framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,1,0.034048000971476235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,3,0.0347626656293869
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,1,0.025920001169045765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,31,0.03930133332808813
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,31,0.035002666215101876
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,63,0.03533866753180822
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,127,0.057087997595469155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,127,0.04232533276081085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,255,0.06623466809590657
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,255,0.08483733733495076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,3,0.026975999275843304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,7,0.03495999922355016
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,63,0.04105599969625473
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,15,0.03510399907827377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,1023,0.29759466648101807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,511,0.14598400394121805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,511,0.10873066385587056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,15,0.03882666677236557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,7,0.027930667002995808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,1023,0.19296000401178995
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,2047,0.36372800668080646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,2047,0.5715359846750895
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,1,0.010522666076819101
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,1,0.008314666648705801
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,4095,0.7041386763254801
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,4095,1.1274933020273845
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,3,0.008336000144481659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,7,0.008192000289758047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,3,0.009642666826645533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,7,0.009637333452701569
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,15,0.009328000247478485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,15,0.009685333197315535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,31,0.009445333232482275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,31,0.010410666465759277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,63,0.009429333110650381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,63,0.012149333953857422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,127,0.009530666594703993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,127,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,255,0.010602666685978571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,255,0.012149333953857422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,511,0.017130666722853977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,511,0.016741332908471424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,1023,0.029829333225886028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,1023,0.02941333254178365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,2047,0.03718400001525879
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,2047,0.037989333271980286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,4095,0.05719999969005585
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,8191,0.09803733229637146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,4095,0.05151999990145365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,8191,0.08680533369382222
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,16383,0.16986133654912314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,1,0.008736000085870424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,16383,0.1527253290017446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,1,0.009893333539366722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,3,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,7,0.008512000242869059
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,7,0.009946666657924652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,15,0.009952000031868616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,15,0.010117333382368088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,31,0.010757333288590113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,63,0.016650666793187458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,127,0.009685333197315535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,31,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,255,0.012538666526476542
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,255,0.011226666470368704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,63,0.013477332890033722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,127,0.017786666750907898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,511,0.028437333802382152
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,511,0.028064000109831493
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,1023,0.033200000723203026
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,2047,0.04922133187452952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,2047,0.044256001710891724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,1023,0.03260799994071325
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,4095,0.07132799923419952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,4095,0.08588799834251404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,8191,0.1467359960079193
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,16383,0.26738667488098145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,8191,0.1264906624952952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,1,0.06084799766540527
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,16383,0.22590400775273642
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,1,0.04781866570313772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,3,0.049679999550183616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,7,0.06258666515350342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,3,0.06177066763242086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,7,0.052111998200416565
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,15,0.07218133409818013
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,15,0.06507733464241028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,31,0.07558933397134145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,63,0.08389866352081299
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,31,0.06490133206049602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,63,0.06593599915504456
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,127,0.10319466392199199
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,255,0.15587199727694193
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,127,0.08285866677761078
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,255,0.12441600362459819
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,511,0.27160000801086426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,511,0.20720533529917398
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,1023,0.5592373212178549
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,1023,0.37353066603342694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,2047,1.086687962214152
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,1,0.013978666315476099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,2047,0.7090506553649902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,1,0.009152000149091085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,3,0.014005333185195923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,3,0.009152000149091085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,7,0.01402666668097178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,15,0.01434133326013883
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,7,0.009103999783595404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,15,0.009957333405812582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,31,0.01581866666674614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,31,0.01184533288081487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,63,0.01611199975013733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,63,0.01179733375708262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,127,0.01642666632930438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,127,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,255,0.013674666484196981
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,255,0.018543999642133713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,511,0.026330667237440746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,511,0.019023999571800232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,1023,0.04828799764315287
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,1023,0.029493334392706554
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,2047,0.09616532921791077
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,2047,0.05457599957784017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,4095,0.17669866482416788
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,4095,0.10053333640098572
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,8191,0.3363200028737386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,8191,0.18902933597564697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,16383,0.6556959946950277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,16383,0.3657173315684001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,1,0.11450133721033733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,1,0.09103999535242717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,3,0.11744532982508342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,3,0.09422933061917622
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,7,0.09846933682759602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,7,0.11858133474985759
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,15,0.12457066774368286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,15,0.14644267161687216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,31,0.1553386648495992
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,31,0.1272160013516744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,63,0.13147200147310892
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,127,0.196015993754069
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,127,0.15784533818562826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,255,0.23904534180959067
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,255,0.29708800713221234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,63,0.15897599856058756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,511,0.5220160086949667
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,511,0.40174400806427
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,1,0.22415467103322348
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,1,0.17656532923380533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,1023,0.7252159913380941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,3,0.23147199551264444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,1023,1.0856853326161702
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,3,0.1829493244489034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,7,0.25167999664942425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,7,0.194922665754954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,15,0.29306666056315106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,15,0.25284266471862793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,31,0.3038506706555684
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,31,0.25442665815353394
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,63,0.3092533349990845
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,63,0.2562826673189799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,127,0.3062346577644348
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,127,0.38369067509969074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,255,0.5819093386332194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,255,0.4686986605326335
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,1,0.47709866364796955
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,1,0.3621973196665446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,3,0.3855146567026774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,7,0.5041066805521647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,7,0.40725866953531903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,15,0.5795199871063232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,3,0.4967786471048991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,15,0.502239982287089
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,31,0.6004906495412191
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,31,0.5022720098495483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,1,0.015263999501864115
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,63,0.5048480033874512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,63,0.61080535252889
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,1,0.013637332866589228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,127,0.6055573225021362
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,127,0.7570827007293701
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,3,0.015279999623696009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,3,0.01393066719174385
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,7,0.013839999834696451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,7,0.015365333606799444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,15,0.014357333381970724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,31,0.014597332725922266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,31,0.020367999871571858
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,15,0.016506666938463848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,63,0.014975999792416891
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,63,0.020351999749739964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,127,0.017136000096797943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,127,0.020245333512624104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,255,0.02533866713444392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,511,0.050101334849993386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,255,0.02366400013367335
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,1023,0.09453333417574565
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,2047,0.18254399299621582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,1023,0.05855466425418854
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,511,0.03505599995454153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,2047,0.10292266805966695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,4095,0.19024000565210977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,4095,0.3505226771036784
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,8191,0.3631306489308675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,8191,0.6862293084462484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,16383,1.291973352432251
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,16383,0.7083199818929037
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,1,0.749392032623291
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,1,0.9647413094838461
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,3,0.9864532947540283
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,3,0.7693653106689453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,7,1.0018293062845867
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,7,0.8084853490193685
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,15,1.151685317357381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,15,0.9980533123016357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,31,1.193450689315796
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,63,1.2133653163909912
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,63,1.003002643585205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,31,0.9975306987762451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,fp8,1,1.4923787117004395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,float16,1,1.924181302388509
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,fp8,3,1.5307092666625977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,float16,3,1.9653065999348958
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,1,0.019333332777023315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,1,0.015125333021084467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,float16,7,1.9944267272949219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,fp8,7,1.6094880104064941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,float16,15,2.2971253395080566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,3,0.0195573332409064
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,fp8,15,1.9892479578653972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,3,0.015466666469971338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,float16,31,2.380517323811849
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,7,0.019685332973798115
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,fp8,31,1.9892053604125977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,7,0.016074666132529575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,15,0.019866666446129482
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,31,0.019882666567961376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,15,0.021802666286627453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,31,0.02181866765022278
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,63,0.01989866668979327
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,63,0.022298666338125866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,127,0.023408000667889912
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,127,0.026858667532602947
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,255,0.04576000074545542
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,255,0.03446399917205175
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,511,0.07730666796366374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,511,0.058602665861447654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,1023,0.15846932927767435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,1023,0.10220799843470256
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,2047,0.2937120000521342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,2047,0.19096000989278158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,4095,0.5689386526743571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,4095,0.3675893147786458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,8191,1.1218026479085286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,8191,0.7218506336212158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,1,0.024688000480333965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,1,0.020661332954963047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,3,0.025311999022960663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,3,0.02128533273935318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,7,0.025994665920734406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,7,0.021967999637126923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,31,0.027600000301996868
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,15,0.030042665700117748
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,15,0.02755733331044515
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,127,0.0421973317861557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,31,0.02996266633272171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,63,0.030645333230495453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,127,0.03219199925661087
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,63,0.027701333165168762
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,255,0.06279466549555461
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,255,0.05131733417510986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,511,0.08356266220410664
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,511,0.10819733142852783
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,2047,0.416042685508728
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,1023,0.21970132986704508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,1023,0.1474720040957133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,1,0.008303999900817871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,2047,0.27749866247177124
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,float16,4095,0.8112320105234782
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,3,0.0084906667470932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,1,0.00960533320903778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,1,float16,fp8,4095,0.5376000006993612
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,3,0.009514666472872099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,7,0.008458666503429413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,7,0.009626666704813639
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,31,0.009359999870260557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,15,0.009775999933481216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,31,0.0102613332370917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,63,0.00955200009047985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,63,0.012106666962305704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,127,0.009359999870260557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,15,0.009413333609700203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,127,0.012234666695197424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,255,0.010485333700974783
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,255,0.01221866657336553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,511,0.016757333030303318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,1023,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,511,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,1023,0.029477333029111225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,2047,0.03690666705369949
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,4095,0.05489600201447805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,2047,0.03770133356253306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,4095,0.050944000482559204
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,8191,0.09575466314951579
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,1,0.008463999877373377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,8191,0.08473066488901775
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,fp8,16383,0.15079466501871744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,1,float16,float16,16383,0.1669493317604065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,1,0.009717333440979322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,3,0.009866666669646898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,3,0.009546666716535887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,7,0.008410666758815447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,7,0.009749333063761393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,31,0.010522666076819101
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,15,0.009797333429257074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,15,0.008842666943868002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,31,0.009685333197315535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,63,0.009642666826645533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,127,0.009797333429257074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,127,0.012479999413092932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,255,0.010901333143313726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,63,0.012357333054145178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,255,0.012170666207869848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,511,0.02811199923356374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,511,0.028010666370391846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,2047,0.043290664752324425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,2047,0.04614933331807455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,1023,0.03254933406909307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,1023,0.03243733445803324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,4095,0.0692799985408783
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,8191,0.12069333593050639
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,8191,0.1332480013370514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,4095,0.07744533320267995
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,float16,16383,0.2395146687825521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,1,float16,fp8,16383,0.2198986609776815
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,float16,1,0.04584533472855886
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,fp8,1,0.037445334096749626
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,float16,3,0.046666666865348816
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,float16,7,0.047695999344189964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,fp8,3,0.038880000511805214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,float16,15,0.05588266750176748
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,fp8,15,0.05060266455014547
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,fp8,7,0.04022933294375738
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,fp8,31,0.05036800106366476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,float16,31,0.05671999851862589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,float16,63,0.06393066545327504
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,fp8,63,0.05074666440486908
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,float16,127,0.07881600161393483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,fp8,127,0.06411733229955037
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,float16,255,0.11794132987658183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,fp8,255,0.09510933359464009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,float16,511,0.2060693303743998
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,fp8,511,0.15870933731396994
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,fp8,1023,0.28355733553568524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,float16,1023,0.42553067207336426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,1,0.013850666582584381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,float16,2047,0.8174293041229248
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,1,float16,fp8,2047,0.5363733371098837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,3,0.013738666971524557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,1,0.009952000031868616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,3,0.009152000149091085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,15,0.014277332772811254
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,15,0.00984533317387104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,7,0.009130666653315226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,7,0.013621332744757334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,31,0.015696000307798386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,127,0.01613333324591319
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,31,0.011648000528415045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,63,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,63,0.01590399940808614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,127,0.011882666498422623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,255,0.018170667191346485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,255,0.013637332866589228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,511,0.01899733394384384
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,511,0.02465066562096278
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,1023,0.045253331462542214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,1023,0.029418667157491047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,2047,0.08683733145395915
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,2047,0.05197333296140035
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,4095,0.16503467162450156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,4095,0.09854400157928467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,8191,0.31486932436625165
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,16383,0.36158935228983563
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,float16,16383,0.6118880112965902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,1,float16,fp8,8191,0.18668800592422485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,float16,1,0.08785067001978557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,fp8,1,0.06977066894372304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,float16,3,0.08954667051633199
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,fp8,3,0.07223999996980031
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,float16,7,0.09059199690818787
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,fp8,7,0.07533866663773854
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,float16,15,0.10668266812960307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,float16,31,0.1183093289534251
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,float16,63,0.12109333276748657
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,fp8,15,0.09463999668757121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,fp8,31,0.09522133072217305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,fp8,63,0.10043733318646748
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,float16,127,0.1497760017712911
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,fp8,127,0.12001599868138631
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,float16,255,0.2266613245010376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,fp8,255,0.1811786691347758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,float16,511,0.3959893385569255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,fp8,511,0.30298133691151935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,fp8,1023,0.5453066825866699
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,1,float16,float16,1023,0.8230026563008627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,float16,1,0.16959466536839804
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,fp8,1,0.1341759959856669
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,float16,3,0.17345066865285239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,fp8,3,0.13874666889508566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,fp8,7,0.1458026667435964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,float16,7,0.1827359994252523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,float16,15,0.222271998723348
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,fp8,15,0.1871946652730306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,fp8,31,0.19271467129389444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,fp8,63,0.19380799929300943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,float16,63,0.23435733715693155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,float16,31,0.23012266556421915
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,float16,127,0.28990934292475384
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,fp8,127,0.2319200038909912
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,float16,255,0.4394986629486084
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,1,float16,fp8,255,0.35391998291015625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,float16,1,0.33779199918111164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,fp8,1,0.2636959950129191
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,float16,3,0.3694719870885213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,float16,7,0.3802773157755534
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,fp8,3,0.27851200103759766
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,fp8,7,0.30457599957784015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,float16,15,0.4374613364537557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,fp8,15,0.37894399960835773
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,fp8,31,0.37859733899434406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,float16,31,0.4530986547470093
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,float16,63,0.46056000391642254
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,fp8,63,0.3805333375930786
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,1,0.019354666272799175
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,1,0.014965333044528961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,3,0.019600000232458115
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,fp8,127,0.45578134059906006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,1,float16,float16,127,0.5701280037562052
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,7,0.019359999646743137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,3,0.015061333775520325
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,7,0.015322666615247726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,15,0.02019199977318446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,15,0.015018666783968607
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,31,0.02205866575241089
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,63,0.01706133286158244
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,31,0.016938666502634685
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,63,0.022672000030676525
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,127,0.017279999951521557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,127,0.022944000860055287
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,511,0.04387733340263367
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,255,0.019632000476121902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,255,0.026426665484905243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,511,0.027050666511058807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,1023,0.07011733452479045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,2047,0.0802400012811025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,2047,0.1409386694431305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,1023,0.04582933088143667
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,4095,0.26068800687789917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,4095,0.1485973298549652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,8191,0.5001066525777181
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,8191,0.2816426753997803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,float16,16383,0.9786667029062907
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,1,float16,fp8,16383,0.547871987024943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,float16,1,0.7258346875508627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,float16,3,0.7427893479665121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,fp8,3,0.5797439813613892
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,float16,7,0.7536266644795736
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,fp8,1,0.5648693243662516
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,fp8,7,0.6083199977874756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,float16,15,0.8676533699035645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,fp8,15,0.7509600321451823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,float16,31,0.8984053134918213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,fp8,31,0.7508479754130045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,float16,63,0.9134399890899658
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,1,float16,fp8,63,0.75382399559021
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,1,float16,fp8,1,1.1228426297505696
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,1,float16,fp8,3,1.1518452962239583
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,1,float16,float16,3,1.4778614044189453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,1,float16,float16,7,1.4998985926310222
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,1,float16,float16,1,1.4456000328063965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,1,float16,fp8,7,1.2106719811757405
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,1,0.014005333185195923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,1,float16,float16,15,1.7293599446614583
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,1,float16,fp8,15,1.494442621866862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,1,0.014058666924635569
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,3,0.014453332871198654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,1,float16,float16,31,1.7894879976908367
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,3,0.014032000054915747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,7,0.01461333284775416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,1,float16,fp8,31,1.4943572680155437
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,15,0.01692266638080279
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,15,0.016224000602960587
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,31,0.017018667111794155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,31,0.01621333385507266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,63,0.01621333385507266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,127,0.020074666788180668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,255,0.03305066625277201
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,63,0.017258666455745697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,127,0.01850133389234543
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,255,0.026506667335828144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,511,0.045168002446492515
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,1023,0.07912000020345052
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,1023,0.11411733428637187
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,511,0.05840000013510386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,2047,0.21411200364430746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,4095,0.28257066011428833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,2047,0.14787200093269348
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,4095,0.4126933415730794
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,1,0.02386133372783661
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,1,0.01939733326435089
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,float16,8191,0.8109280268351237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,3,0.02476266771554947
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,1,float16,fp8,8191,0.5543680191040039
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,3,0.019802667200565338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,7,0.01985599969824155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,7,0.02462399999300639
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,15,0.023754666248957317
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,15,0.026693334182103474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,31,0.027237333357334137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,31,0.02382933348417282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,63,0.027632000545660656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,63,0.023813332120577495
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,127,0.03573333223660787
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,127,0.027893332143624622
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,255,0.057333335280418396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,255,0.04233600199222565
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,511,0.09763733545939128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,511,0.07087466617425282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,1023,0.20014933745066324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,1023,0.12485866745313008
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,2047,0.3824479977289836
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,2047,0.23382933934529623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,float16,4095,0.7385226885477701
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,1,float16,fp8,4095,0.4506400028864543
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,1,0.008400000010927519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,1,0.009519999846816063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,3,0.00949866697192192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,3,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,7,0.008453333129485449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,15,0.008629333227872849
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,63,0.009477333476146063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,15,0.009648000200589498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,31,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,7,0.010474666953086853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,31,0.010426666587591171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,63,0.012266666938861212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,127,0.009423999736706415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,255,0.01071999967098236
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,127,0.012138667205969492
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,255,0.01209066684047381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,1023,0.017925333231687546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,511,0.016666666915019352
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,511,0.01658133293191592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,1023,0.01770666614174843
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,2047,0.03241066634654999
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,4095,0.04429866870244344
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,2047,0.0323786661028862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,4095,0.0432533323764801
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,8191,0.07719466586907704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,8191,0.06630399823188782
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,1,0.008442666381597519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,1,0.009589333087205887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,fp8,16383,0.12013866504033406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,3,0.008394666636983553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,7,0.008463999877373377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,1,float16,float16,16383,0.1302293340365092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,3,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,7,0.009621333330869675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,15,0.008863999818762144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,15,0.009829333052039146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,31,0.010405333091815313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,31,0.009493333597977957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,63,0.009370666618148485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,127,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,127,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,255,0.010837333897749582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,63,0.012330666184425354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,255,0.012410666793584824
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,511,0.027877333263556164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,1023,0.03254399945338567
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,511,0.027802666028340656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,1023,0.03213333338499069
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,2047,0.043194666504859924
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,2047,0.0443200021982193
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,4095,0.06594133377075195
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,4095,0.07668266693751018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,8191,0.1304746667544047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,8191,0.119759996732076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,float16,1,0.04012266546487808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,float16,16383,0.23780800898869833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,1,float16,fp8,16383,0.21406932671864828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,float16,3,0.04093866546948751
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,fp8,1,0.03186666717131933
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,float16,7,0.04146133363246918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,fp8,3,0.033088001112143196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,float16,15,0.0468746672074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,fp8,7,0.0341333324710528
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,fp8,15,0.04301333427429199
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,fp8,31,0.04302933315436045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,float16,31,0.047781333327293396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,float16,63,0.05187733471393585
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,fp8,63,0.043247997760772705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,fp8,127,0.053301334381103516
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,float16,255,0.1011786659558614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,float16,511,0.1753973364830017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,float16,127,0.06788266698519389
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,fp8,255,0.08090666433175404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,fp8,511,0.13365333278973898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,float16,1023,0.36075735092163086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,fp8,1023,0.2384799917538961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,1,0.013584000368913015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,float16,2047,0.693557341893514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,1,float16,fp8,2047,0.45025599002838135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,3,0.013722666849692663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,1,0.016016000260909397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,3,0.015930666277805965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,7,0.013584000368913015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,15,0.014159999787807465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,7,0.015909332782030106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,15,0.016496000190575916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,31,0.01793066660563151
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,63,0.01590399940808614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,31,0.015599999576807022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,63,0.02141333371400833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,127,0.01581866666674614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,127,0.021514666577180225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,255,0.01820266619324684
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,255,0.021551998953024547
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,511,0.024405332903067272
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,511,0.024746666351954143
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,1023,0.040474665661652885
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,1023,0.03489600121974945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,2047,0.05570666491985321
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,2047,0.08658132950464885
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,4095,0.15785599748293558
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,4095,0.0999840001265208
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,8191,0.3089173237482707
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,8191,0.18211734294891357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,float16,16383,0.6011733214060465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,1,float16,fp8,16383,0.3457760016123454
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,float16,1,0.07316266496976216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,fp8,1,0.058634668588638306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,float16,3,0.07453333338101704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,fp8,3,0.06081599990526835
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,float16,7,0.07595199843247731
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,fp8,7,0.06320000191529591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,float16,15,0.08920533458391826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,fp8,15,0.07976533472537994
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,float16,31,0.09782933195432027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,fp8,31,0.07980800171693166
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,float16,63,0.10195733110109965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,fp8,63,0.08383466800053914
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,float16,127,0.12533332904179892
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,fp8,127,0.10057600339253743
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,float16,255,0.18915732701619467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,fp8,255,0.15173332889874777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,float16,511,0.330677330493927
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,fp8,511,0.25360532601674396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,fp8,1023,0.4552053213119507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,1,float16,float16,1023,0.686298688252767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,float16,1,0.14196266730626425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,fp8,1,0.11264000336329143
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,float16,3,0.14532267053922018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,fp8,3,0.11659733454386394
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,float16,7,0.1490239997704824
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,fp8,7,0.12245866656303406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,float16,15,0.18531199296315512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,fp8,15,0.1556106706460317
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,fp8,31,0.1609493295351664
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,float16,31,0.19350399573644003
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,float16,63,0.19720532496770224
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,fp8,63,0.16272000471750894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,float16,127,0.24258132775624594
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,fp8,127,0.19428267081578574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,float16,255,0.3682933251063029
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,1,float16,fp8,255,0.2964479923248291
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,float16,1,0.27905066808064777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,fp8,1,0.22024534145991007
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,float16,3,0.2990079919497172
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,fp8,3,0.22915732860565186
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,float16,7,0.3182239929835002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,fp8,7,0.2492426633834839
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,float16,15,0.3667573531468709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,fp8,15,0.31678932905197144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,float16,31,0.37971198558807373
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,fp8,31,0.3171626726786296
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,float16,63,0.38572800159454346
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,fp8,63,0.31833066542943317
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,float16,127,0.47696534792582196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,1,0.01931200052301089
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,1,0.014752000570297241
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,3,0.01924266666173935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,1,float16,fp8,127,0.381279985109965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,3,0.01431999976436297
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,7,0.01926933353145917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,7,0.014192000031471252
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,31,0.016895999511082966
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,15,0.014789332946141561
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,15,0.01985599969824155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,31,0.021770666042963665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,63,0.02222399910291036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,63,0.016890666137139004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,127,0.02266666789849599
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,255,0.02589866767326991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,127,0.016805333395799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,255,0.019632000476121902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,511,0.03917866696914037
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,511,0.026911998788515728
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,1023,0.06859200199445088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,1023,0.044495999813079834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,2047,0.13269333044687906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,2047,0.07937066753705342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,4095,0.24968532721201578
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,4095,0.1451573371887207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,8191,0.4779520034790039
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,8191,0.2775893410046895
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,float16,16383,0.9352320035298666
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,1,float16,fp8,16383,0.5401653448740641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,float16,1,0.6066773335138956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,fp8,1,0.4697120189666748
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,float16,3,0.6213440100351969
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,float16,7,0.6301066478093466
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,fp8,3,0.4846666653951009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,fp8,7,0.5089386701583862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,fp8,15,0.6271733442942301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,float16,15,0.7263147036234537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,float16,31,0.7521813710530599
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,fp8,31,0.6271520058314005
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,float16,63,0.7626079718271891
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,1,float16,fp8,63,0.629535992940267
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,1,float16,float16,1,1.2068426609039307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,1,float16,fp8,1,0.9382773240407308
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,1,float16,float16,3,1.2363093694051106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,1,float16,fp8,3,0.9613706270853678
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,1,float16,float16,7,1.253226677576701
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,1,0.013818666338920593
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,1,float16,fp8,15,1.2473333676656086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,1,float16,fp8,7,1.0117333730061848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,1,float16,float16,15,1.4460585912068684
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,1,0.013738666971524557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,3,0.013690666606028875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,1,float16,float16,31,1.4963040351867676
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,7,0.014607999473810196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,7,0.013621332744757334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,3,0.014218666901191076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,15,0.014922666052977243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,1,float16,fp8,31,1.2469706535339355
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,15,0.015322666615247726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,63,0.015322666615247726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,31,0.015696000307798386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,31,0.015087999403476715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,63,0.016623999923467636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,127,0.016879999389251072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,255,0.02775466690460841
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,127,0.018133333573738735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,255,0.023887999355793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,511,0.05433600147565206
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,1023,0.06925333539644878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,511,0.03923200070858002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,1023,0.10691733161608379
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,2047,0.19777067502339682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,2047,0.12850133577982584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,4095,0.37542935212453205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,4095,0.24715733528137207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,float16,8191,0.7321279843648275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,1,float16,fp8,8191,0.4840213457743327
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,1,0.01516266663869222
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,3,0.019434666881958645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,1,0.019194666296243668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,3,0.015461333096027374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,7,0.019834666202465694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,7,0.016037333756685257
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,15,0.02176533391078313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,15,0.01995733380317688
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,31,0.02197333425283432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,31,0.019776000330845516
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,63,0.022319999833901722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,63,0.01969066634774208
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,127,0.026730666557947796
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,127,0.023386667172114056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,255,0.04570133487383524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,255,0.03446933378775915
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,511,0.07855466504891713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,511,0.05836800237496694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,1023,0.15812266866366068
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,1023,0.10204266508420308
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,2047,0.2940000096956889
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,2047,0.1912213365236918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,4095,0.5680853525797526
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,4095,0.368064006169637
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,8191,1.1200640201568604
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,1,0.008447999755541483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,8191,0.7228106657663981
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,1,0.009418666362762451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,3,0.00943999985853831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,3,0.008277333031098047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,7,0.008442666381597519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,7,0.009402666861812273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,15,0.008522666369875273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,15,0.00966933307548364
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,31,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,63,0.009359999870260557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,31,0.010106666634480158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,127,0.00938666673998038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,63,0.012240000069141388
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,127,0.012240000069141388
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,255,0.010378666842977205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,511,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,511,0.016549333930015564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,255,0.012106666962305704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,1023,0.017386666188637417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,1023,0.017749333133300144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,2047,0.030005333324273426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,2047,0.029669334491093952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,4095,0.04043200115362803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,8191,0.06839466591676076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,4095,0.039808000127474465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,16383,0.11037866274515788
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,8191,0.058874666690826416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,16383,0.099781334400177
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,32767,0.19579199949900308
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,1,0.008394666636983553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,32767,0.17486933867136636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,1,0.009690666571259499
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,3,0.009749333063761393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,7,0.008512000242869059
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,3,0.008250666782259941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,15,0.00983466642598311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,7,0.009519999846816063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,15,0.008687999720374743
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,31,0.009488000224033991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,31,0.010437333335479101
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,63,0.00949866697192192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,63,0.012448000411192576
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,127,0.009541333342591921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,127,0.012367999802033106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,255,0.012341332932313284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,255,0.01055466632048289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,511,0.017082666357358296
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,511,0.016751999656359356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,1023,0.02956266701221466
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,1023,0.02993600070476532
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,2047,0.03710933278004328
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,2047,0.037952000896135964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,4095,0.057855998476346336
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,8191,0.09774933258692424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,4095,0.0517493337392807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,8191,0.08712533116340637
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,16383,0.16987200578053793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,16383,0.15265066425005594
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,32767,0.31467199325561523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,32767,0.28280532360076904
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,1,0.03430933256944021
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,1,0.026330667237440746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,3,0.03477866699298223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,3,0.0271519993742307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,7,0.035589332381884255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,15,0.039247999588648476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,7,0.027962667246659596
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,15,0.035349334279696144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,31,0.03962666789690653
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,31,0.035455999275048576
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,63,0.04124800115823746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,63,0.035375999907652535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,127,0.042080000042915344
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,127,0.05721066892147064
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,255,0.0848533312479655
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,255,0.06638933221499126
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,511,0.1085653305053711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,511,0.14662399888038635
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,1023,0.29811733961105347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,1023,0.19324799378712973
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,1,0.008810666700204214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,2047,0.571232000986735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,2047,0.3643253246943156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,1,0.009930666536092758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,4095,1.1278613408406575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,4095,0.7047466437021891
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,3,0.009866666669646898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,3,0.008538666491707167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,7,0.008762666955590248
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,15,0.008863999818762144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,7,0.009690666571259499
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,15,0.010064000263810158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,31,0.009455999980370203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,31,0.0106133334338665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,127,0.009919999788204828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,63,0.009530666594703993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,63,0.012383999923865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,127,0.012613333761692047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,255,0.011071999867757162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,255,0.01259200026591619
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,511,0.028789333999156952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,511,0.02815466622511546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,1023,0.033258666594823204
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,1023,0.03295466552178065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,2047,0.0444106658299764
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,2047,0.048783997694651283
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,4095,0.08661866188049316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,4095,0.07196266452471416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,8191,0.14670399824778238
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,8191,0.1260373294353485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,16383,0.2678239941596985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,16383,0.22612265745798746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,1,0.061109334230422974
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,32767,0.5117599964141846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,32767,0.4248053232828776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,3,0.06223999957243601
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,1,0.048058668772379555
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,3,0.04971733192602793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,7,0.06303466856479645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,7,0.05213866631189982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,15,0.072543998559316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,15,0.06527466575304668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,31,0.07603199779987335
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,31,0.06514666477839152
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,63,0.08431999882062276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,63,0.06598933537801106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,127,0.10354666908582051
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,127,0.082997332016627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,255,0.15586666266123453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,255,0.12434132893880208
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,511,0.2720426718393962
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,511,0.20754132668177286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,1023,0.5600159962972006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,1023,0.3734026749928792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,2047,1.0859306653340657
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,2047,0.7095413208007812
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,1,0.09146133065223694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,1,0.11461866895357768
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,3,0.11707733074824016
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,3,0.09442133704821269
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,7,0.11891733606656392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,7,0.09895466764767964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,15,0.14566933115323386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,15,0.12487467130025227
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,31,0.15636266271273294
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,63,0.15920533736546835
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,255,0.2972533305486043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,31,0.12787733475367227
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,63,0.13154666622479758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,127,0.19648534059524536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,127,0.15743999679883322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,255,0.23920534054438272
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,511,0.5231786568959554
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,511,0.40249598026275635
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,1,0.22414400180180868
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,3,0.2323039968808492
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,1,0.17697600523630777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,7,0.25305600961049396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,3,0.18369066715240479
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,7,0.1950719952583313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,15,0.29530133803685504
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,15,0.25300266345342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,31,0.30555200576782227
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,31,0.2546773354212443
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,63,0.256117324034373
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,63,0.3102026581764221
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,127,0.30690133571624756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,1,0.014165333161751429
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,127,0.3844746748606364
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,255,0.5826506614685059
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,1,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,255,0.46862932046254474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,3,0.014159999787807465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,3,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,7,0.014250667144854864
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,7,0.010314666976531347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,31,0.015978666643301647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,31,0.01180800050497055
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,15,0.014666666587193808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,15,0.010101333260536194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,63,0.016016000260909397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,127,0.016421332955360413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,255,0.01876266673207283
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,63,0.01198400060335795
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,127,0.01201066623131434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,255,0.013594667116800943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,511,0.019215999792019527
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,511,0.02603733291228612
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,1023,0.04814399778842926
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,2047,0.0958079993724823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,1023,0.02959999938805898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,2047,0.054757331808408104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,4095,0.10032533605893452
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,4095,0.17553067207336426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,8191,0.33497599760691327
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,8191,0.18918399016062418
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,16383,0.36609601974487305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,16383,0.6537706851959229
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,32767,1.2982292970021565
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,32767,0.8676479657491049
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,1,0.48015467325846356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,3,0.4995466470718384
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,7,0.5066186587015787
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,1,0.3633546829223633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,7,0.40917332967122394
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,15,0.5840053160985311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,3,0.38634665807088214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,31,0.6041813294092814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,31,0.503546675046285
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,15,0.5030026833216349
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,63,0.5051253239313761
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,63,0.6127359867095947
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,127,0.6069013277689616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,127,0.7593759695688883
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,1,0.9685813585917155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,1,0.7527146339416504
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,3,0.9925119876861572
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,7,1.0058666865030925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,15,1.1606773535410564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,7,0.8118560314178467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,15,0.9994186560312907
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,3,0.7714613278706869
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,1,0.01505600040157636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,1,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,31,1.2011306285858154
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,3,0.013776000589132309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,31,0.9995306332906088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,63,1.2170506318410237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,3,0.015381333728631338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,7,0.014032000054915747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,63,1.0037333170572917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,7,0.015306666493415833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,15,0.014352000008026758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,15,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,31,0.014538666854302088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,31,0.020245333512624104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,63,0.020293333878119785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,63,0.01492799942692121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,127,0.017423999806245167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,127,0.020474666108687718
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,255,0.025231999655564625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,255,0.023647998770078022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,511,0.050586665670077004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,1023,0.09502933422724406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,511,0.03503466645876566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,1023,0.058730666836102806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,2047,0.18207999070485434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,2047,0.10282133022944133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,4095,0.34947200616200763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,4095,0.19058134158452353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,8191,0.3629759947458903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,8191,0.6850506464640299
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,1,0.014186666657527288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,16383,1.2897013028462727
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,16383,0.7086666425069174
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,1,0.013957332819700241
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,3,0.014469332993030548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,3,0.014122666170199713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,7,0.01492799942692121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,15,0.01704000060757001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,7,0.014362666755914688
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,31,0.017136000096797943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,15,0.016362667083740234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,31,0.016229332735141117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,63,0.017231999586025875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,127,0.01978133370478948
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,63,0.01607999950647354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,127,0.018677332748969395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,255,0.03311999887228012
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,255,0.026608000199000042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,511,0.04525866607824961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,511,0.05840000013510386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,1023,0.11435733238855998
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,1023,0.07879466811815898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,2047,0.21337066094080606
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,2047,0.1488746702671051
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,4095,0.28306132555007935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,4095,0.4122026761372884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,8191,0.8098933696746826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,8191,0.5550346771876017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,1,0.008336000144481659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,1,0.00943999985853831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,3,0.009194666519761086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,3,0.008218666538596153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,7,0.008362666393319765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,15,0.008469333251317343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,7,0.00919999989370505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,15,0.009722666814923286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,31,0.010175999874869982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,31,0.009162666896979014
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,63,0.00922133338948091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,63,0.011957333733638128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,127,0.009343999748428663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,127,0.012047999848922094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,255,0.012149333953857422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,255,0.01032533310353756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,511,0.016656000167131424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,511,0.01640533283352852
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,1023,0.017711999515692394
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,2047,0.01811733345190684
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,2047,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,1023,0.017258666455745697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,4095,0.037578667203585304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,4095,0.03708266715208689
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,8191,0.05487466851870219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,8191,0.05093333125114441
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,16383,0.09553600351015727
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,16383,0.08522666494051616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,32767,0.16703466574350992
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,1,0.008261333530147871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,32767,0.1509119967619578
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,1,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,3,0.00842666688064734
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,7,0.008421333506703377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,7,0.009402666861812273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,3,0.009626666704813639
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,15,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,15,0.00874133345981439
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,31,0.00927466650803884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,31,0.010309333602587381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,63,0.00943999985853831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,127,0.009648000200589498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,63,0.012159999459981918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,127,0.012240000069141388
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,255,0.01080000028014183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,255,0.012181332955757776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,511,0.016522667060295742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,1023,0.029482667644818623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,1023,0.02961066613594691
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,511,0.01640533283352852
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,2047,0.03759466608365377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,2047,0.036992001036802925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,4095,0.05503466725349426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,4095,0.05100266635417938
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,8191,0.08543999989827473
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,8191,0.09542399644851685
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,16383,0.16675732533137003
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,16383,0.1511306663354238
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,32767,0.31009600559870404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,32767,0.2768266598383586
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,1,0.024832000335057575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,1,0.0206986665725708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,3,0.02518400053183238
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,7,0.025962665677070618
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,3,0.021157334248224895
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,15,0.030080000559488933
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,7,0.021877333521842957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,15,0.027776000400384266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,31,0.03030399978160858
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,31,0.02754133443037669
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,63,0.03044266750415166
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,63,0.027727998793125153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,127,0.03249600032965342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,127,0.04203199843565623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,255,0.06312533219655354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,255,0.05130666494369507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,511,0.10883733630180359
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,511,0.08370133241017659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,1023,0.21941334009170532
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,1023,0.14756266276041666
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,2047,0.41629334290822345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,2047,0.27779199679692584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,4095,0.8111519813537598
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,1,0.008602666358153025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,4095,0.5373333295186361
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,3,0.00855466661353906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,1,0.00972800018886725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,7,0.008618666479984919
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,3,0.009952000031868616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,7,0.009642666826645533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,15,0.009839999799927076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,15,0.00873066671192646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,31,0.009482666850090027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,63,0.012362666428089142
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,31,0.010442666709423065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,127,0.009717333440979322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,63,0.009429333110650381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,255,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,127,0.012229333321253458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,255,0.01227733368674914
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,511,0.028010666370391846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,511,0.02789866675933202
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,1023,0.032458665470282234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,2047,0.046480000019073486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,1023,0.03251733382542928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,2047,0.04355733096599579
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,4095,0.07790400087833405
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,4095,0.06919999917348225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,8191,0.13286933302879333
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,8191,0.12119999527931213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,16383,0.2399359941482544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,16383,0.22084800402323404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,32767,0.4542826811472575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,32767,0.4145600001017253
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,1,0.045850664377212524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,3,0.046911999583244324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,3,0.03890133400758108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,1,0.037434667348861694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,7,0.04775466521581014
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,7,0.040261333187421165
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,15,0.05612266560395559
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,15,0.05083733300367991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,31,0.05709333221117655
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,63,0.06443200012048085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,31,0.050714666644732155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,63,0.05064000189304352
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,127,0.07922666768232982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,127,0.06404800216356914
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,255,0.11858133474985759
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,255,0.09556800127029419
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,511,0.20658133427302042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,511,0.15869333346684775
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,1023,0.42533334096272785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,1023,0.28359999259312946
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,2047,0.817349354426066
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,2047,0.5371093352635702
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,1,0.08770133058230083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,1,0.07002133131027222
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,7,0.09084266424179077
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,3,0.07218666871388753
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,7,0.07532266775767009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,15,0.10731732845306396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,15,0.09478933612505595
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,3,0.0892693301041921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,31,0.11874666810035706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,31,0.09563199679056804
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,63,0.10054933031400044
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,63,0.12171733379364014
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,127,0.12004266182581584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,127,0.15010133385658264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,255,0.22694933414459229
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,511,0.3961546818415324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,255,0.1814026633898417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,511,0.30353599786758423
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,1,0.16973867019017538
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,1,0.1341973344484965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,3,0.17361599206924438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,3,0.13886400063832602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,7,0.1829973260561625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,7,0.14672000209490457
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,15,0.22327999273935953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,15,0.18734399477640787
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,31,0.23148266474405924
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,31,0.19293334086736044
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,63,0.23518399397532144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,63,0.1938719948132833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,127,0.2320479949315389
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,127,0.29064534107844037
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,255,0.4394933382670085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,1,0.01394133393963178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,255,0.35390400886535645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,1,0.008858666444818178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,3,0.013738666971524557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,3,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,7,0.013845333208640417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,7,0.009322666873534521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,15,0.014159999787807465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,15,0.009808000177145004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,31,0.015802666544914246
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,31,0.011920000116030375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,63,0.01603200038274129
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,127,0.016127999871969223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,63,0.011616000284751257
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,127,0.011834666132926941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,255,0.0183146670460701
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,255,0.013584000368913015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,511,0.024800000091393787
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,1023,0.045034666856129967
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,1023,0.029472000896930695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,511,0.019018666197856266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,2047,0.08782933155695598
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,2047,0.05198400219281515
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,4095,0.16639467080434164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,4095,0.09782933195432027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,8191,0.31704533100128174
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,8191,0.1867520014444987
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,16383,0.6155946652094523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,16383,0.3614720106124878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,32767,1.2208213011423747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,32767,0.8454559644063314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,1,0.33879999319712323
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,1,0.26524267594019574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,3,0.37218133608500165
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,3,0.2792373299598694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,7,0.38207467397054035
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,7,0.305567999680837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,15,0.44013333320617676
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,31,0.4558986822764079
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,15,0.3792426586151123
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,31,0.37911999225616455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,63,0.46188799540201825
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,63,0.38070400555928546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,127,0.5715466737747192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,127,0.4567573467890422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,1,0.5664159854253134
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,3,0.5814293225606283
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,1,0.7282240390777588
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,7,0.757584015528361
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,7,0.6115146478017172
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,15,0.8733226458231608
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,15,0.7514293193817139
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,3,0.7475466728210449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,1,0.019685332973798115
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,3,0.019786667078733444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,1,0.015178666760524115
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,3,0.015125333021084467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,31,0.9039200146993002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,31,0.7515786488850912
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,7,0.019498666127522785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,63,0.7544106642405192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,7,0.01505600040157636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,63,0.9147786299387614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,15,0.020303999384244282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,15,0.01509333277742068
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,31,0.022287999590237934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,31,0.017162666966517765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,63,0.022533332308133442
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,63,0.017157333592573803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,255,0.01961600035429001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,127,0.02295999974012375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,255,0.02649066597223282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,511,0.04366933306058248
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,511,0.027098665634791057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,1023,0.07047999898592631
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,127,0.0170666662355264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,1023,0.04572799801826477
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,2047,0.14198933045069376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,2047,0.07969066500663757
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,4095,0.2616533239682515
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,4095,0.1479626695315043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,8191,0.5017973184585571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,8191,0.2811253269513448
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,1,0.013717333475748697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,16383,0.546341339747111
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,3,0.013967999567588171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,1,0.015295999745527903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,3,0.01540800059835116
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,7,0.014085333794355392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,16383,0.9832906723022461
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,7,0.015594666202863058
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,15,0.01666133354107539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,15,0.01443733274936676
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,31,0.01492799942692121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,31,0.02029866725206375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,63,0.020432000358899433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,63,0.014922666052977243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,255,0.025301332275072735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,127,0.02037866661945979
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,255,0.02369066576162974
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,511,0.05130666494369507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,1023,0.09489599863688152
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,511,0.03491200009981791
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,1023,0.05872533222039541
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,127,0.017210666090250015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,2047,0.18253332376480103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,2047,0.1027786632378896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,4095,0.3505386511484782
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,4095,0.19074134031931558
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,8191,0.36347198486328125
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,8191,0.6868906815846761
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,16383,1.2901066939036052
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,3,0.00808533343176047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,1,0.007983999947706858
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,1,0.009413333609700203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,16383,0.7097280025482178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,3,0.009546666716535887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,7,0.009322666873534521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,7,0.008383999889095625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,15,0.008314666648705801
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,15,0.009482666850090027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,31,0.00927466650803884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,63,0.009381333366036415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,31,0.010170666500926018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,127,0.00922133338948091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,127,0.012063999970753988
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,255,0.010277333358923594
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,63,0.011999999483426413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,255,0.012015999605258306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,511,0.01646399994691213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,511,0.016410666207472484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,1023,0.017621333400408428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,2047,0.01782400036851565
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,1023,0.01728533332546552
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,2047,0.01746133342385292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,4095,0.029461334149042766
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,8191,0.041162667175134025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,8191,0.04060266663630804
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,16383,0.07135466734568278
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,16383,0.06067200005054474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,4095,0.02977599948644638
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,32767,0.12166933218638103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,32767,0.11097600062688191
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,65535,0.21599467595418295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,1,0.008117333054542542
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,65535,0.19392534097035727
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,1,0.009610666582981745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,3,0.008458666503429413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,7,0.008223999912540117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,7,0.009359999870260557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,15,0.008656000097592672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,15,0.009786666681369146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,3,0.009455999980370203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,31,0.010464000205198923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,31,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,63,0.009493333597977957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,63,0.012186666329701742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,127,0.009461333354314169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,127,0.012154666086037954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,255,0.010656000425418219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,511,0.016757333030303318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,511,0.016480000068744022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,1023,0.017909333109855652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,1023,0.017642666896184284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,2047,0.029743999242782593
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,4095,0.040789333482583366
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,255,0.012175999581813812
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,2047,0.02959999938805898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,4095,0.03997866561015447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,8191,0.06773866713047028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,8191,0.05876266459623972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,16383,0.11027200023333232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,16383,0.10078932841618855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,32767,0.19476799170176187
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,65535,0.36804266770680744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,1,0.01918399954835574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,65535,0.3296533425649007
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,1,0.015114666273196539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,3,0.019621333728233974
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,32767,0.1753973364830017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,3,0.015413332730531693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,7,0.019893333315849304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,7,0.01597333326935768
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,15,0.02165333429972331
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,15,0.02004266654451688
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,31,0.0220320001244545
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,31,0.019930666933457058
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,63,0.019717333217461903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,127,0.0269813338915507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,127,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,255,0.04569066564242045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,255,0.03435199956099192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,511,0.07826133569081624
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,511,0.05819733440876007
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,63,0.02214933435122172
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,1023,0.1590986649195353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,1023,0.10218666990598042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,2047,0.19110933939615884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,2047,0.29446399211883545
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,4095,0.36747201283772785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,8191,1.122431993484497
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,4095,0.5694400072097778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,1,0.008410666758815447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,1,0.00972800018886725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,8191,0.7229173183441162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,3,0.00843733362853527
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,3,0.009514666472872099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,7,0.009701333319147428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,7,0.008373333141207695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,15,0.00955200009047985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,31,0.009322666873534521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,15,0.009701333319147428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,63,0.009450666606426239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,63,0.012400000045696894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,127,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,127,0.01239466667175293
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,31,0.010405333091815313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,511,0.016837333639462788
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,255,0.010885333021481832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,511,0.01699200024207433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,1023,0.029674666623274486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,1023,0.02993600070476532
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,255,0.012432000289360682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,2047,0.03791466603676478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,4095,0.05672533313433329
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,8191,0.08771199981371562
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,8191,0.09825066725413005
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,4095,0.0516480008761088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,16383,0.17005334297815958
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,16383,0.15318399667739868
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,2047,0.03699733316898346
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,32767,0.3139999906222026
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,32767,0.28362133105595905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,65535,0.5990933179855347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,3,0.03505066782236099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,1,0.026335999369621277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,65535,0.5352746645609537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,1,0.03440533330043157
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,7,0.03535466641187668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,3,0.027232001225153606
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,7,0.02808533360560735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,15,0.03915733347336451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,15,0.03534399966398875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,31,0.03533866753180822
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,31,0.03977066775163015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,63,0.03552533437808355
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,63,0.04125333329041799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,127,0.04200533529122671
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,255,0.08524266878763835
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,255,0.06629333396752675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,511,0.1466506620248159
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,127,0.057760000228881836
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,511,0.10871466994285583
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,1023,0.29892265796661377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,1023,0.19293866554896036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,2047,0.5721280177434286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,2047,0.3641546567281087
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,4095,1.1301546891530354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,1,0.04811733464399973
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,4095,0.7049492994944254
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,3,0.062181333700815834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,7,0.0629013329744339
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,3,0.04987733562787374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,1,0.06091199815273285
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,7,0.052069331208864846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,15,0.07277866701285045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,15,0.06539200246334076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,31,0.0653599997361501
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,63,0.084197332461675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,63,0.06594666838645935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,127,0.08223466575145721
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,255,0.15585600336392721
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,255,0.1244533360004425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,511,0.27250667413075763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,31,0.07658666869004567
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,511,0.2075200080871582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,127,0.10371733705202739
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,1023,0.3732000192006429
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,1,0.11453866958618164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,1023,0.5607946713765463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,1,0.09165866176287334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,3,0.11690133810043335
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,7,0.11900267004966736
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,3,0.09444800019264221
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,15,0.14551466703414917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,15,0.12520533800125122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,7,0.09904533624649048
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,31,0.12869866689046225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,31,0.15659733613332114
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,63,0.15973333517710367
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,63,0.13166933258374533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,1,0.008698666468262672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,127,0.15743999679883322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,255,0.2977280020713806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,127,0.19653334220250449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,511,0.5232853492101034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,511,0.4025866587956746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,1,0.009797333429257074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,3,0.008559999987483025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,255,0.23896533250808716
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,7,0.008853333070874214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,3,0.009888000165422758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,15,0.008858666444818178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,15,0.010181333248813948
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,31,0.009557333464423815
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,31,0.010645333677530289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,7,0.009765333185593287
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,63,0.01246400053302447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,127,0.009786666681369146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,255,0.012448000411192576
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,127,0.012458667159080505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,255,0.011125333607196808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,63,0.00973866693675518
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,511,0.02870933214823405
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,1023,0.033359999457995095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,2047,0.04900800188382467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,1023,0.03301866600910822
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,2047,0.04423999786376953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,4095,0.08636800448099773
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,8191,0.1470026671886444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,511,0.028149334092934925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,4095,0.07237333556016286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,8191,0.12638933459917703
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,16383,0.26737066109975177
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,16383,0.22564800580342612
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,32767,0.5114880005518595
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,32767,0.4245599905649821
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,65535,1.0066133340199788
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,1,0.17722666263580322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,3,0.23332800467809042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,1,0.22403732935587564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,65535,0.8332693576812744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,3,0.18360533316930136
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,7,0.25361599524815875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,7,0.19564799467722574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,15,0.2956266601880391
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,15,0.2534986734390259
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,31,0.306549330552419
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,63,0.25628799200057983
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,127,0.3068693280220032
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,127,0.3845013380050659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,31,0.2552479902903239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,63,0.3104213277498881
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,255,0.5833866596221924
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,255,0.46889599164326984
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,1,0.48042134443918866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,1,0.3641599814097087
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,3,0.5010826587677002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,3,0.38708265622456867
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,15,0.5846026738484701
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,7,0.5078666607538859
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,15,0.5031893253326416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,7,0.4097226858139038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,31,0.6054720083872477
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,31,0.5034240086873373
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,1,0.014271999398867289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,63,0.6124320030212402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,63,0.5051786502202352
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,127,0.7600693702697754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,7,0.014266667266686758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,3,0.014261333892742792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,3,0.009381333366036415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,1,0.009935999910036722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,127,0.606602668762207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,7,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,15,0.014576000471909841
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,31,0.01597333326935768
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,15,0.009999999776482582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,63,0.01626666635274887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,31,0.011871999750534693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,127,0.016490666816631954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,255,0.01870399961868922
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,63,0.01258133351802826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,127,0.011994666109482447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,255,0.013818666338920593
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,511,0.026634665826956432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,511,0.019071999937295914
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,1023,0.04814933240413666
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,1023,0.029845332105954487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,2047,0.09587732950846355
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,8191,0.33506667613983154
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,4095,0.10050666332244873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,8191,0.1891040007273356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,16383,0.653498649597168
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,4095,0.1753066579500834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,16383,0.36655465761820477
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,2047,0.05482133229573568
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,32767,1.3012746969858806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,1,0.019727999965349834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,1,0.015103999525308609
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,3,0.01970133309563001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,32767,0.8716533184051514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,7,0.019658666104078293
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,3,0.014959999670584997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,15,0.02019199977318446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,15,0.01504533365368843
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,31,0.022272000710169475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,31,0.017050666113694508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,63,0.022677332162857056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,63,0.017114666601022083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,7,0.015013333410024643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,127,0.01717866708834966
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,127,0.022997332115968067
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,255,0.026320000489552815
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,255,0.0198186660806338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,511,0.04321600000063578
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,511,0.0269813338915507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,1023,0.07001600166161855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,2047,0.14010133345921835
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,4095,0.26072533925374347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,4095,0.1483573317527771
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,8191,0.4990453322728475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,1023,0.045610666275024414
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,8191,0.2810720006624858
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,2047,0.08025066554546356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,16383,0.9769386450449625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,1,0.008186666915814081
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,3,0.008042666440208754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,1,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,16383,0.5470293362935384
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,7,0.008373333141207695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,15,0.009637333452701569
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,7,0.009119999905427298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,15,0.008341333518425623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,31,0.009008000294367472
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,31,0.010197333370645842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,3,0.01032533310353756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,63,0.009343999748428663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,63,0.012149333953857422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,127,0.00921066664159298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,255,0.011909333368142446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,255,0.010389333590865135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,511,0.016229332735141117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,127,0.0120319997270902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,1023,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,2047,0.017850667238235474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,1023,0.01740266631046931
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,2047,0.01725333308180173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,4095,0.018016000588734944
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,511,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,4095,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,16383,0.030586667358875275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,8191,0.02163200080394745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,8191,0.02207999924818675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,16383,0.028917332490285236
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,32767,0.060736000537872314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,32767,0.0469760000705719
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,1,0.008127999802430471
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,65535,0.08877333005269368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,1,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,3,0.009578666960199675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,7,0.008378666515151659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,3,0.0081386665503184
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,65535,0.10706667105356853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,15,0.00842666688064734
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,7,0.009349333122372627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,15,0.009418666362762451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,31,0.009039999917149544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,31,0.010255999863147736
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,63,0.009322666873534521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,127,0.01192533348997434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,63,0.011994666109482447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,255,0.01191466674208641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,255,0.010474666953086853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,511,0.01661866654952367
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,1023,0.01759999990463257
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,511,0.016528000434239704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,1023,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,2047,0.017674667139848072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,127,0.010426666587591171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,4095,0.03759466608365377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,2047,0.017866666118303936
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,8191,0.05426666637261709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,8191,0.05118933320045471
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,4095,0.03700266778469086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,16383,0.09618666768074036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,16383,0.08562133709589641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,32767,0.1507200002670288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,1,0.013861333330472311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,3,0.014533333480358124
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,1,0.01421333352724711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,65535,0.2786666750907898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,32767,0.16614400347073874
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,65535,0.3095093369483948
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,3,0.0143306665122509
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,7,0.014837333311637243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,7,0.01431999976436297
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,15,0.01682666689157486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,15,0.01629866659641266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,31,0.01704000060757001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,63,0.016362667083740234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,127,0.019808000574509304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,63,0.017258666455745697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,127,0.018474667022625606
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,255,0.03317866722742716
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,31,0.016250666230916977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,255,0.026629333694775898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,511,0.04490133126576742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,511,0.05884266893068949
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,1023,0.1142080028851827
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,2047,0.2134880026181539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,2047,0.14801067113876343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,4095,0.2823893427848816
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,1023,0.07922666768232982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,8191,0.8101226488749186
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,1,0.008458666503429413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,4095,0.4118560155232747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,1,0.009519999846816063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,3,0.008463999877373377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,8191,0.5534400145212809
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,3,0.009717333440979322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,7,0.009733333562811216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,15,0.00871999996403853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,7,0.008181333541870117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,31,0.009232000137368837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,31,0.010362666721145311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,63,0.012330666184425354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,15,0.009648000200589498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,63,0.010597333312034607
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,127,0.012186666329701742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,127,0.009573333586255709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,255,0.012106666962305704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,255,0.010666667173306147
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,511,0.016757333030303318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,511,0.016634666671355564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,2047,0.03763733307520548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,2047,0.0369759996732076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,1023,0.02943466603755951
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,4095,0.054527997970581055
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,1023,0.02943466603755951
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,8191,0.0960586667060852
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,8191,0.08450667063395183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,4095,0.05109333495299021
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,16383,0.16685332854588827
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,16383,0.151119997104009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,32767,0.2765760024388631
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,32767,0.3094453414281209
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,1,0.024645333488782246
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,65535,0.5256693363189697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,3,0.025274666647116344
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,65535,0.593999981880188
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,3,0.021269333859284718
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,1,0.020645332833131153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,7,0.02588266630967458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,7,0.02199999988079071
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,15,0.03012266755104065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,15,0.027935999135176342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,31,0.03019733230272929
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,63,0.030623999734719593
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,127,0.0424586683511734
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,255,0.06323199967543285
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,63,0.027674667537212372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,255,0.05146666864554087
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,31,0.027615999182065327
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,127,0.03252266595760981
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,511,0.10814399520556132
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,511,0.08388266960779826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,1023,0.2192373275756836
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,1023,0.1479626695315043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,2047,0.27808000644048053
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,1,0.046069333950678505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,1,0.037658666570981346
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,2047,0.4166239897410075
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,4095,0.5364266633987427
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,4095,0.81058669090271
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,3,0.04682666560014089
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,3,0.038912000755469
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,7,0.047797332207361855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,7,0.04029866556326548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,15,0.05609600245952606
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,15,0.050581331054369606
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,31,0.05707733333110809
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,63,0.06435200075308482
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,63,0.0509493350982666
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,127,0.07941866914431255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,127,0.06404266754786174
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,255,0.1188320020834605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,31,0.05054933329423269
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,511,0.1590773363908132
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,1023,0.42501334349314374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,1023,0.28381866216659546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,255,0.0956106682618459
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,1,0.07006399830182393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,511,0.20595200856526694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,1,0.08770666519800822
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,3,0.08914666374524434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,3,0.07227199772993724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,7,0.09061866998672485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,7,0.07532800237337749
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,15,0.09494400024414062
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,15,0.10717866818110149
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,31,0.11865066488583882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,31,0.09554133812586467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,63,0.10082667072614034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,127,0.15030399958292642
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,127,0.12029332915941875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,63,0.12131200234095256
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,255,0.22744532426198324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,511,0.3957706689834595
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,255,0.1813760002454122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,1,0.008474666625261307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,1,0.009626666704813639
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,3,0.008623999853928884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,511,0.30394667387008667
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,7,0.009775999933481216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,7,0.009423999736706415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,3,0.009712000067035357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,31,0.009525333220760027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,15,0.009712000067035357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,31,0.010709332923094431
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,15,0.008757333581646284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,63,0.012378666549921036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,63,0.009573333586255709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,127,0.010618666807810465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,255,0.010949333508809408
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,255,0.0124746672809124
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,511,0.02809600035349528
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,127,0.01227733368674914
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,1023,0.03267733256022135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,1023,0.03249066571394602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,511,0.02779199928045273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,2047,0.0436106671889623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,2047,0.04638933142026266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,4095,0.07758399844169617
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,8191,0.12111999591191609
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,4095,0.06899199883143108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,16383,0.23959465821584067
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,8191,0.13267733653386435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,16383,0.22028799851735434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,32767,0.454645315806071
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,65535,0.8876746495564779
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,32767,0.41365333398183185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,65535,0.8040746847788492
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,1,0.17012800772984824
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,1,0.13436800241470337
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,3,0.17392534017562866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,7,0.1825173298517863
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,3,0.13910933335622153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,15,0.18738667170206705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,7,0.14657066265741983
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,31,0.2319413423538208
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,15,0.2230666677157084
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,63,0.19427200158437094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,31,0.19324266910552979
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,127,0.23228800296783447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,63,0.23501867055892944
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,127,0.2913173238436381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,255,0.44090131918589276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,255,0.35447998841603595
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,1,0.3410773277282715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,1,0.2640426754951477
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,3,0.28067733844121295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,15,0.44013333320617676
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,7,0.3060906728108724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,3,0.37218666076660156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,7,0.38275198141733807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,15,0.37914665540059406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,31,0.37908267974853516
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,1,0.009061333412925402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,1,0.013781332721312841
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,31,0.4556373357772827
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,63,0.3806826670964559
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,63,0.46195733547210693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,3,0.013754667093356451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,3,0.00916800027092298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,127,0.5727200110753378
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,7,0.01002133327225844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,15,0.014352000008026758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,127,0.45680534839630127
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,7,0.013701333353916803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,15,0.01055466632048289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,31,0.015728000551462173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,31,0.011882666498422623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,63,0.015962666521469753
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,127,0.01602666700879733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,63,0.011877333124478659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,511,0.024735999604066212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,255,0.018346666047970455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,511,0.018874666343132656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,255,0.013573333621025085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,127,0.011999999483426413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,1023,0.02961066613594691
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,2047,0.08673600355784099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,1023,0.04478933413823446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,2047,0.05153599878152212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,4095,0.09829866886138916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,4095,0.16570132970809937
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,8191,0.315285325050354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,8191,0.1860533356666565
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,16383,0.6128693421681722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,1,0.014245333770910898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,1,0.009194666519761086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,16383,0.36079466342926025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,32767,0.8463466962178549
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,32767,1.2235573132832844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,3,0.014218666901191076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,7,0.01413333291808764
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,3,0.010159999753038088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,7,0.009242666885256767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,15,0.014453332871198654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,31,0.016106666376193363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,15,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,31,0.011834666132926941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,63,0.011893333246310553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,63,0.01617066686352094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,127,0.011941333611806234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,127,0.01647466669480006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,255,0.013653332988421122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,255,0.01869333287080129
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,511,0.02622399975856145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,1023,0.04827199876308441
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,2047,0.09641066193580627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,511,0.01903466631968816
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,4095,0.1760853330294291
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,1023,0.029525332152843475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,2047,0.054661333560943604
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,4095,0.10073600212732951
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,8191,0.3346560001373291
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,8191,0.18925867478052774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,16383,0.6538986762364706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,1,0.009082666908701261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,32767,1.3083093166351318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,16383,0.3670080105463664
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,1,0.009039999917149544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,3,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,32767,0.8643946647644043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,3,0.008976000050703684
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,7,0.008293333152929941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,15,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,7,0.01032533310353756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,15,0.009306666751702627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,31,0.010069333637754122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,31,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,63,0.010090666512648264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,127,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,127,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,63,0.011861333002646765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,511,0.016176000237464905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,255,0.010565333068370819
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,255,0.011786667009194693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,1023,0.01725333308180173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,511,0.01599466676513354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,2047,0.01746133342385292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,1023,0.017573333034912746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,4095,0.01801066721479098
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,2047,0.01743999992807706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,4095,0.01764800027012825
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,8191,0.029685333371162415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,8191,0.02961066613594691
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,16383,0.04038933416207632
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,16383,0.040991999208927155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,32767,0.062080000837643944
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,32767,0.07211733361085255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,65535,0.11337600151697795
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,65535,0.12443733215332031
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,131071,0.22870934009552002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,1,0.008165333420038223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,131071,0.2059146761894226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,3,0.0081386665503184
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,7,0.008240000034372011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,1,0.009242666885256767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,3,0.010106666634480158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,15,0.008442666381597519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,7,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,15,0.009455999980370203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,31,0.009269333134094873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,31,0.010101333260536194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,63,0.009232000137368837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,127,0.01192533348997434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,127,0.010144000252087912
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,63,0.01190399999419848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,255,0.01201066623131434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,255,0.01044800008336703
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,511,0.016447999825080235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,1023,0.01736533393462499
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,511,0.016415999581416447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,1023,0.017301333447297413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,4095,0.029722665747006733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,4095,0.029706666866938274
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,2047,0.017722666263580322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,2047,0.01758933315674464
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,8191,0.04098666707674662
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,8191,0.04019733270009359
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,16383,0.07094933092594147
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,16383,0.0610346645116806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,32767,0.12128000458081563
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,32767,0.11126400033632915
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,1,0.013722666849692663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,65535,0.21559999386469522
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,65535,0.1938719948132833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,131071,0.4081706603368123
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,131071,0.3612373272577922
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,1,0.015024000157912573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,3,0.014053333550691605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,3,0.015360000232855478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,7,0.013866666704416275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,15,0.016650666793187458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,31,0.014725333700577417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,7,0.015509333461523056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,15,0.014479999740918478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,63,0.015216000378131866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,31,0.020213333268960316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,63,0.02037866661945979
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,127,0.016869333883126576
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,127,0.02042666698495547
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,255,0.02516799916823705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,255,0.02363733450571696
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,511,0.034858666360378265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,1023,0.05861333509286245
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,1023,0.0953439970811208
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,511,0.050426666935284935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,2047,0.18294399976730347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,2047,0.10311999917030334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,4095,0.3509013255437215
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,4095,0.19092265764872232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,8191,0.6894240379333496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,8191,0.3631360133488973
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,1,0.008298666526873907
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,1,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,16383,1.2996533711751301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,3,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,16383,0.7090720335642496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,3,0.009765333185593287
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,7,0.008453333129485449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,7,0.009546666716535887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,15,0.008463999877373377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,15,0.009679999823371569
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,63,0.012213333199421564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,31,0.009306666751702627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,31,0.010293333480755487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,127,0.009455999980370203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,63,0.009301333377758661
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,255,0.01051733394463857
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,127,0.012133333832025528
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,255,0.012304000556468964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,511,0.016602666427691776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,511,0.016544000556071598
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,1023,0.017818666994571686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,1023,0.01747200017174085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,2047,0.029530666768550873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,4095,0.04077333211898804
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,4095,0.04002666721741358
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,8191,0.06808533271153767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,2047,0.029839999973773956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,8191,0.05898133416970571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,16383,0.11024000247319539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,16383,0.10045866171518962
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,32767,0.19502399365107217
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,32767,0.17504000663757324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,65535,0.32889066139856976
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,65535,0.3677813212076823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,1,0.019178666174411774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,131071,0.6401973168055216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,1,0.01524266724785169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,3,0.01959466685851415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,131071,0.7174133459726969
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,3,0.015568000574906668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,7,0.0198186660806338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,7,0.016021333634853363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,15,0.021664001047611237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,31,0.022143999735514324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,31,0.01982933282852173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,15,0.020021333048741024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,63,0.02218666672706604
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,63,0.019861333072185516
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,127,0.02683199942111969
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,127,0.023237332701683044
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,255,0.03469333300987879
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,255,0.045941332976023354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,511,0.0773173322280248
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,511,0.05856533348560333
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,1023,0.15803733468055725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,1023,0.10218133529027303
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,2047,0.29685332377751666
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,2047,0.19144533077875772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,4095,0.571727991104126
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,4095,0.36824532349904376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,1,0.03425066669782003
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,1,0.026464000344276428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,3,0.0347626656293869
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,8191,1.1250666777292888
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,3,0.027322667340437572
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,8191,0.7233546574910482
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,7,0.0354666660229365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,15,0.03917866696914037
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,7,0.027930667002995808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,31,0.03982933362325033
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,63,0.04126933217048645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,31,0.03551466763019562
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,15,0.03549866626660029
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,63,0.03545066714286804
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,127,0.05793066819508871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,127,0.04201599955558777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,255,0.0662720004717509
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,255,0.08523733417193095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,511,0.14596266547838846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,511,0.10898666580518086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,1023,0.19319466749827066
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,1023,0.2991466720898946
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,2047,0.5735946496327718
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,1,0.060933331648508705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,1,0.04806933303674062
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,3,0.06228800117969513
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,2047,0.3646080096562703
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,7,0.06284266710281372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,3,0.050000001986821495
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,7,0.05212266743183136
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,15,0.07292266686757405
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,15,0.06539733211199443
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,31,0.0767146646976471
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,31,0.06541866560777028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,63,0.06609599788983662
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,127,0.10404800375302632
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,63,0.0842026670773824
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,127,0.08256533245245616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,255,0.12426132957140605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,511,0.2704373399416606
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,255,0.15709333618481955
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,1023,0.5606880187988281
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,511,0.2076266606648763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,1,0.009253333633144697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,1023,0.3734346628189087
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,3,0.008421333506703377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,1,0.010506667196750641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,3,0.009573333586255709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,7,0.008602666358153025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,7,0.009509333098928133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,15,0.008703999842206636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,15,0.010586666564146677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,31,0.009418666362762451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,31,0.010464000205198923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,127,0.009589333087205887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,63,0.012181332955757776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,63,0.010608000059922537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,127,0.012383999923865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,255,0.010543999572594961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,255,0.012080000092585882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,511,0.016869333883126576
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,511,0.01704000060757001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,1023,0.029994666576385498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,1023,0.02935466667016347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,4095,0.05719466507434845
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,2047,0.03719999889532725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,2047,0.03797333439191183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,8191,0.09806399544080098
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,4095,0.05173333485921224
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,8191,0.08705600102742513
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,16383,0.1706506609916687
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,16383,0.15335999925931296
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,32767,0.31333333253860474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,32767,0.2834666570027669
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,65535,0.5353493293126425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,65535,0.6001280148824056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,131071,0.8381706873575846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,1,0.11467199524243672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,1,0.09129599730173747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,131071,1.0119360287984211
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,3,0.1167039970556895
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,3,0.0946560005346934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,7,0.11880532900492351
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,7,0.09920533498128255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,15,0.14592533310254416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,15,0.1250879963239034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,31,0.15666666626930237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,63,0.15930666526158652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,63,0.13177067041397095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,31,0.1283199985822042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,127,0.19711466630299887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,255,0.298581341902415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,255,0.23945599794387817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,127,0.15754666924476624
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,511,0.4023840030034383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,1,0.22429867585500082
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,511,0.5215040047963461
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,3,0.23371734221776327
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,1,0.1771413286526998
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,3,0.1837600072224935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,7,0.254314661026001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,7,0.1958613395690918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,15,0.29597334067026776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,15,0.25336533784866333
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,63,0.3107786575953166
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,31,0.2550560037295024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,31,0.3064639965693156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,63,0.2563199996948242
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,127,0.3854186534881592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,1,0.010010666524370512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,1,0.009850666547815004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,3,0.008586666857202848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,127,0.3071146607398987
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,3,0.009930666536092758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,7,0.009786666681369146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,7,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,255,0.4689919948577881
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,255,0.5841120084126791
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,15,0.008896000062425932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,15,0.009957333405812582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,31,0.010735999792814255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,31,0.009712000067035357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,63,0.00961599995692571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,127,0.009818666925032934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,63,0.012533333152532578
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,255,0.012469333906968435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,255,0.011066666493813196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,127,0.01251199965675672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,511,0.02861333390076955
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,511,0.02834133307139079
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,1023,0.03313066562016805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,1023,0.0329120010137558
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,2047,0.0441599984963735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,2047,0.04971733192602793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,4095,0.08636266986529033
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,4095,0.0726506660381953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,8191,0.147189329067866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,8191,0.12677866220474243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,16383,0.2677173415819804
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,16383,0.22564800580342612
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,32767,0.4242773453394572
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,32767,0.5099253257115682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,1,0.008703999842206636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,65535,0.8268746534983317
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,65535,1.0079519748687744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,1,0.009850666547815004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,3,0.008517333616813024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,3,0.009930666536092758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,7,0.008816000074148178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,7,0.009712000067035357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,15,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,15,0.010144000252087912
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,63,0.009642666826645533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,31,0.01073066641887029
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,31,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,63,0.012448000411192576
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,127,0.012448000411192576
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,127,0.009877333417534828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,255,0.011061333119869232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,255,0.012586666891972223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,511,0.028543998797734577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,511,0.028373333315054577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,1023,0.03331200033426285
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,1023,0.03296533226966858
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,2047,0.05018133421738943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,2047,0.04442666471004486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,4095,0.08214933176835378
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,4095,0.07285866638024648
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,8191,0.14758933583895364
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,8191,0.12452266613642375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,16383,0.26746666431427
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,16383,0.22613867123921713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,32767,0.5103840033213297
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,32767,0.42402132352193195
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,1,0.00926399976015091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,1,0.007962666451931
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,3,0.008143999924262365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,65535,1.0079092979431152
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,65535,0.8261013031005859
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,3,0.009301333377758661
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,7,0.007978666573762894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,15,0.008261333530147871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,15,0.009317333499590555
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,7,0.00919999989370505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,31,0.008896000062425932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,63,0.009226666763424873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,31,0.010133333504199982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,63,0.012005332857370377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,127,0.009125333279371262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,127,0.011999999483426413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,255,0.010128000130256018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,255,0.011823999385039011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,511,0.0162773331006368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,2047,0.0174346665541331
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,511,0.015930666277805965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,1023,0.01720533271630605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,1023,0.017370666066805523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,2047,0.017338667064905167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,4095,0.017498667041460674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,4095,0.017535999417304993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,8191,0.01747200017174085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,8191,0.017727999637524288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,16383,0.017583999782800674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,16383,0.018165333817402523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,32767,0.024149333437283833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,32767,0.023183998962243397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,65535,0.04553066690762838
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,1,0.007994666695594788
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,65535,0.03389866650104523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,1,0.009248000259200731
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,3,0.008101333553592363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,131071,0.07857066889603932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,131071,0.06629333396752675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,3,0.009109333157539368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,7,0.008463999877373377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,7,0.009408000235756239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,31,0.00926399976015091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,15,0.008234666660428047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,15,0.009573333586255709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,31,0.009952000031868616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,63,0.009290666629870733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,127,0.00919999989370505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,63,0.011968000481526056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,127,0.01219733307758967
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,255,0.010368000095089277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,511,0.016106666376193363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,255,0.012080000092585882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,511,0.016336000214020412
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,1023,0.017317333569129307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,1023,0.017103999853134155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,2047,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,2047,0.01757866640885671
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,4095,0.017797333498795826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,4095,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,8191,0.029882666965325672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,8191,0.029685333371162415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,16383,0.040287998815377556
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,16383,0.040949332217375435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,32767,0.07161599894364674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,32767,0.06182933350404104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,65535,0.12405332922935486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,65535,0.11317867040634155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,1,0.013957332819700241
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,1,0.009130666653315226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,3,0.014277332772811254
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,131071,0.20609599351882935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,131071,0.22907733917236328
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,3,0.009354666496316591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,7,0.01404800017674764
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,7,0.009077333534757296
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,15,0.014501333236694336
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,15,0.0099093330403169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,31,0.015957333147525787
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,31,0.011978667229413986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,63,0.0162773331006368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,63,0.011957333733638128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,127,0.011871999750534693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,127,0.016597333053747814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,255,0.018725333114465077
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,255,0.01358933374285698
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,511,0.02647999922434489
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,511,0.019061333189407986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,1023,0.04896533489227295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,1023,0.029487999776999157
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,2047,0.09659199913342793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,2047,0.054671997825304665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,4095,0.1768959959348043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,4095,0.10018666585286458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,8191,0.33641600608825684
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,8191,0.18917866547902426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,16383,0.6554613510767618
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,16383,0.36638931433359784
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,1,0.00816000004609426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,1,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,3,0.008154666672150293
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,3,0.00955200009047985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,32767,1.3191680113474529
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,7,0.008234666660428047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,7,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,32767,0.8625280062357584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,15,0.008250666782259941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,15,0.009290666629870733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,31,0.009039999917149544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,31,0.010293333480755487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,63,0.009136000027259191
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,63,0.01202133297920227
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,127,0.009423999736706415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,127,0.012122667084137598
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,255,0.012149333953857422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,255,0.010309333602587381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,511,0.01643199970324834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,1023,0.017477333545684814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,1023,0.017162666966517765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,511,0.016517333686351776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,2047,0.017792000124851864
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,2047,0.017637333522240322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,4095,0.029680001238981884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,4095,0.03002133220434189
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,8191,0.04106133431196213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,8191,0.04020266731580099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,16383,0.0709440012772878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,16383,0.06131733457247416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,32767,0.12139733632405598
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,32767,0.11154133081436157
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,65535,0.2160373330116272
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,65535,0.1938613255818685
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,1,0.015311999867359797
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,1,0.013749333719412485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,3,0.013946666071812311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,131071,0.40760000546773273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,131071,0.36080535252888996
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,3,0.015477333217859268
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,7,0.013797332843144735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,7,0.015578666081031164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,15,0.014490666488806406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,15,0.01672533278663953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,31,0.01470400020480156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,31,0.02042666698495547
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,63,0.014869333555301031
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,63,0.02027200038234393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,127,0.01695466662446658
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,127,0.02037866661945979
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,255,0.02492800106604894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,511,0.05089066425959269
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,255,0.023610666394233704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,511,0.03498133271932602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,1023,0.09657067060470581
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,1023,0.05881066620349884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,2047,0.18317866325378418
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,2047,0.102783997853597
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,4095,0.35237868626912433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,4095,0.1905919909477234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,8191,0.3630239963531494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,8191,0.6911946932474772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,1,0.01912533367673556
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,16383,1.3041813373565674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,1,0.015040000279744467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,3,0.01952533299724261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,3,0.015530666957298914
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,16383,0.7079626719156901
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,7,0.020058666666348774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,7,0.01599466676513354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,15,0.021733333667119343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,15,0.019882666567961376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,31,0.022015998760859173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,31,0.019893333315849304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,63,0.022074667116006214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,63,0.019962667177120846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,127,0.026954665780067444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,127,0.023402666052182514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,255,0.045935998360315956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,255,0.03456533451875051
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,511,0.0772159993648529
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,511,0.05865600208441416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,1023,0.15985066692034403
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,1023,0.10225600004196167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,2047,0.29788267612457275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,2047,0.19103467464447021
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,1,0.03430933256944021
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,4095,0.5720959901809692
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,1,0.02632533262173335
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,3,0.03494933247566223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,4095,0.36820268630981445
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,3,0.027306665976842243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,7,0.035402665535608925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,7,0.028069332242012024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,15,0.039066667358080544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,15,0.035386666655540466
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,31,0.039808000127474465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,31,0.0354666660229365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,63,0.04148799926042557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,63,0.03551466763019562
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,127,0.05807466804981232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,127,0.04214400053024292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,255,0.06633066634337108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,255,0.08545066912968953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,511,0.14550933241844177
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,511,0.1088053286075592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,1023,0.3002293308575948
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,1023,0.19335466623306274
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,2047,0.5745013157526652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,1,0.00810666692753633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,1,0.009482666850090027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,2047,0.3647040128707886
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,3,0.008522666369875273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,3,0.009690666571259499
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,7,0.008442666381597519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,7,0.009541333342591921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,15,0.009594666461149851
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,31,0.009205333267649015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,15,0.008623999853928884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,63,0.009381333366036415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,31,0.010341333225369453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,127,0.012181332955757776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,127,0.009514666472872099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,63,0.01228800043463707
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,255,0.01051733394463857
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,255,0.01202133297920227
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,1023,0.01777600000301997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,511,0.016506666938463848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,511,0.016693333784739178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,1023,0.017498667041460674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,2047,0.029850666721661884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,2047,0.02959466725587845
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,4095,0.03995199998219808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,8191,0.06668800115585327
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,4095,0.040607998768488564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,8191,0.058970664938290916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,16383,0.10957866907119751
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,16383,0.1004960040251414
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,32767,0.1751306653022766
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,32767,0.19575466712315878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,65535,0.33001067241032916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,65535,0.3678400119145711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,1,0.06113600234190623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,131071,0.7173866430918375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,131071,0.6426613330841064
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,1,0.04822933177153269
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,3,0.0620959997177124
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,7,0.06279466549555461
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,3,0.04982399940490723
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,7,0.052069331208864846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,15,0.06541333099206288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,31,0.0762613316377004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,15,0.07275199890136719
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,31,0.06557333469390869
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,63,0.08478933572769165
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,63,0.06630399823188782
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,127,0.1042080024878184
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,127,0.0831520011027654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,255,0.1570026675860087
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,255,0.12461333473523457
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,511,0.2711626688639323
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,511,0.20779200394948324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,1023,0.5625600020090739
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,1,0.11467199524243672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,1,0.0913866659005483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,1023,0.3739733298619588
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,3,0.09459199508031209
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,3,0.11657599608103435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,7,0.11878933509190877
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,7,0.0993280013402303
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,15,0.14683733383814493
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,15,0.1251466671625773
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,31,0.15690666437149048
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,31,0.12875200311342874
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,63,0.13205333550771078
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,63,0.15989866852760315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,127,0.19762667020161948
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,127,0.15807466705640158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,1,0.008186666915814081
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,3,0.008496000121037165
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,1,0.009733333562811216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,3,0.009749333063761393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,255,0.29950932661692303
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,255,0.23940267165501913
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,7,0.008303999900817871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,7,0.009733333562811216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,15,0.008629333227872849
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,15,0.00984533317387104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,511,0.5230506658554077
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,511,0.4026346604029338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,31,0.009525333220760027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,31,0.010512000570694605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,63,0.009434666484594345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,63,0.012250666817029318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,127,0.009557333464423815
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,127,0.01227733368674914
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,255,0.012373333175977072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,255,0.010703999549150467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,511,0.017114666601022083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,511,0.016778666526079178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,1023,0.029802667597929638
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,1023,0.029706666866938274
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,2047,0.03722666700681051
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,2047,0.03791466603676478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,4095,0.051514665285746254
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,8191,0.09735999504725139
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,8191,0.08752000331878662
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,4095,0.0574239989121755
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,16383,0.17044800519943237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,16383,0.1536746621131897
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,32767,0.3136533300081889
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,32767,0.2827413280804952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,1,0.0595360000928243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,65535,0.5350293318430582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,65535,0.5984746615091959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,1,0.04664533336957296
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,3,0.060906668504079185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,131071,1.0121813615163167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,3,0.04808533191680908
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,131071,0.8377760251363119
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,7,0.0614026685555776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,7,0.04994133114814758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,15,0.07039466500282288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,15,0.06326400240262349
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,31,0.07092800239721934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,31,0.0633653352657954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,63,0.07160533467928569
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,63,0.063509335120519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,127,0.08501866459846497
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,127,0.07578133543332417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,255,0.12660800417264303
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,255,0.11575466394424438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,511,0.21092800299326578
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,511,0.19646400213241577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,1023,0.35976000626881915
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,1023,0.3802880048751831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,2047,0.7156000137329102
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,2047,0.6838133335113525
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,4095,1.4034080505371094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,4095,1.3262346585591633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,1,0.046666666865348816
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,1,0.05978666742642721
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,3,0.060778667529424034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,3,0.04807466765244802
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,7,0.06111466884613037
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,7,0.0506933331489563
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,15,0.07097599903742473
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,15,0.06372266511122386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,31,0.07181866466999054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,31,0.06379733482996623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,63,0.07223466535409291
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,63,0.06406400104363759
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,127,0.08577066659927368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,127,0.07594666878382365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,255,0.12877333164215088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,255,0.11597866813341777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,511,0.21237866083780924
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,511,0.1971786618232727
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,1023,0.38175467650095624
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,1023,0.3602026700973511
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,2047,0.7286773522694906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,2047,0.6838293075561523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,4095,1.413050651550293
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,4095,1.3282506465911865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,15,0.009408000235756239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,1,0.009328000247478485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,3,0.009322666873534521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,7,0.00938666673998038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,31,0.009589333087205887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,63,0.009375999992092451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,127,0.010079999764760336
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,1,0.013280000537633896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,7,0.012858666479587555
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,3,0.01313599944114685
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,255,0.026122666895389557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,15,0.013167999684810638
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,31,0.013210666676362356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,511,0.03450666616360346
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,63,0.01313599944114685
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,1023,0.05759466687838236
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,2047,0.08294933537642162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,127,0.01301866645614306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,255,0.021189334491888683
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,4095,0.10677867134412129
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,4095,0.13615467151006064
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,511,0.03972266614437103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,1023,0.07388266424338023
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,2047,0.10604266325632732
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,15,0.00761600024998188
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,3,0.007786666974425316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,1,0.008656000097592672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,7,0.007770666852593422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,31,0.007781333600481351
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,63,0.007743999982873599
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,127,0.008645333349704742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,1,0.016762666404247284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,3,0.016229332735141117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,127,0.016656000167131424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,7,0.01634666696190834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,15,0.016037333756685257
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,511,0.04829333225886027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,1023,0.06249066690603892
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,511,0.03196800003449122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,31,0.016330666840076447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,63,0.0162773331006368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,255,0.031184000273545582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,2047,0.08442667126655579
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,255,0.017829333742459614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,4095,0.12133866548538208
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,1,0.008597333605090777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,4095,0.0867199997107188
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,1,0.009621333330869675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,3,0.008522666369875273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,1023,0.042266666889190674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,7,0.008559999987483025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,3,0.009648000200589498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,7,0.009717333440979322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,15,0.00873066671192646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,2047,0.05899199843406677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,15,0.00966933307548364
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,31,0.009509333098928133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,63,0.009472000102202097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,63,0.012362666428089142
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,31,0.010277333358923594
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,127,0.009690666571259499
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,255,0.01073066641887029
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,127,0.012240000069141388
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,255,0.012421333541472753
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,511,0.028138667345046997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,511,0.028223998844623566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,1023,0.032858667274316154
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,2047,0.04413333535194397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,1023,0.03258133431275686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,2047,0.043824002146720886
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,4095,0.06631466746330261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,4095,0.06557866434256236
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,8191,0.11016533772150676
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,8191,0.10787199934323628
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,16383,0.19840532541275024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,16383,0.19316265980402628
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,1,0.008405333384871483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,1,0.009397333487868309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,3,0.008469333251317343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,3,0.009530666594703993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,7,0.008549333239595095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,7,0.00949866697192192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,15,0.00879466657837232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,15,0.009866666669646898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,31,0.009322666873534521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,31,0.01044800008336703
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,63,0.009477333476146063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,63,0.012319999436537424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,127,0.00955200009047985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,255,0.010858666151762009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,127,0.012266666938861212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,255,0.012437333663304647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,511,0.028250666956106823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,1023,0.0328053335348765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,511,0.02805333336194356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,1023,0.03270933280388514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,2047,0.044138665000597634
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,2047,0.04369066655635834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,4095,0.06614399949709575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,8191,0.11020800471305847
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,4095,0.06555733581384023
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,8191,0.10774399836858113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,16383,0.1980266571044922
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,16383,0.19276267290115356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,1,0.008559999987483025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,1,0.009712000067035357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,3,0.008597333605090777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,3,0.009525333220760027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,7,0.009546666716535887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,7,0.008672000219424566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,15,0.009663999701539675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,15,0.00879466657837232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,31,0.010469333579142889
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,31,0.009658666948477427
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,63,0.009583999713261923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,127,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,63,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,127,0.012309333930412928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,255,0.010703999549150467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,511,0.028442665934562683
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,511,0.028181334336598713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,255,0.012453333785136541
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,1023,0.03252800057331721
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,1023,0.03294933338960012
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,2047,0.01762666677435239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,2047,0.015893333901961643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,4095,0.018090666582187016
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,8191,0.023050665855407715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,4095,0.01844266677896182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,8191,0.022197333474953968
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,16383,0.025061334172884624
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,16383,0.026949333647886913
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,1,0.009690666571259499
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,1,0.008389333263039589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,3,0.0085333331177632
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,3,0.00960533320903778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,7,0.008597333605090777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,15,0.008661333471536636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,7,0.009717333440979322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,31,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,63,0.012213333199421564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,63,0.00960533320903778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,31,0.010319999729593595
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,127,0.009695999945203463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,127,0.012154666086037954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,255,0.010757333288590113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,255,0.012309333930412928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,511,0.0281333327293396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,511,0.028250666956106823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,15,0.009882666791478792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,1023,0.013909333695967993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,1023,0.010053333515922228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,2047,0.015013333410024643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,4095,0.01640533283352852
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,2047,0.011535999675591787
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,4095,0.013450667262077332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,8191,0.026234666506449383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,8191,0.018522666146357853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,16383,0.03029866764942805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,1,0.013760000467300415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,16383,0.022570667167504627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,1,0.009093333035707474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,3,0.008853333070874214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,3,0.013850666582584381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,7,0.014074667046467463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,7,0.008943999807039896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,15,0.014165333161751429
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,15,0.009621333330869675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,31,0.015546667079130808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,31,0.011839999506870905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,63,0.01609066625436147
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,63,0.01180800050497055
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,127,0.016021333634853363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,255,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,127,0.011717333147923151
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,255,0.013562666873137156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,511,0.018805333723624546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,511,0.024288001159826916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,1023,0.03701333453257879
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,1023,0.02951466788848241
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,2047,0.06147199869155884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,4095,0.09168533484141032
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,2047,0.05014933149019877
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,4095,0.11029332876205444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,8191,0.20805333058039346
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,8191,0.1742239991823832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,16383,0.4031306505203247
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,1,0.013967999567588171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,16383,0.33931199709574383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,3,0.013712000101804733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,1,0.00884799969693025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,3,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,7,0.013978666315476099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,7,0.009103999783595404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,15,0.009701333319147428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,15,0.014335999886194864
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,31,0.015685333559910457
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,31,0.01163200040658315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,63,0.01571200042963028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,63,0.011760000139474869
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,127,0.016069332758585613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,127,0.011674666156371435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,255,0.018351999421914417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,255,0.013568000247081121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,511,0.024538666009902954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,2047,0.06142933170000712
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,1023,0.03686933219432831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,1023,0.029333333174387615
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,511,0.01869333287080129
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,2047,0.0503359983364741
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,4095,0.11026133100191753
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,4095,0.091648002465566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,8191,0.20769067605336508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,8191,0.174127995967865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,16383,0.4023253520329793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,16383,0.33932801087697345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,1,0.013616000612576803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,1,0.009066666786869368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,3,0.013855999956528345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,3,0.008965333302815756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,7,0.009119999905427298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,7,0.0138026662170887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,15,0.014101333916187286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,15,0.009808000177145004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,31,0.015477333217859268
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,31,0.011701333026091257
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,63,0.015925332903862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,63,0.011839999506870905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,127,0.01611199975013733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,127,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,255,0.018133333573738735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,255,0.013536000003417334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,511,0.0242399995525678
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,511,0.018794666975736618
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,1023,0.016303999970356624
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,1023,0.013690666606028875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,2047,0.018778666853904724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,2047,0.015541333705186844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,4095,0.020213333268960316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,4095,0.01836799954374631
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,8191,0.03186666717131933
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,8191,0.03278400003910065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,16383,0.039173332353432976
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,1,0.01394133393963178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,16383,0.04037333279848099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,1,0.00897066667675972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,3,0.008997333546479544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,3,0.01381333296497663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,7,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,15,0.009722666814923286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,7,0.008997333546479544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,15,0.014245333770910898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,31,0.015829333414634068
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,31,0.011920000116030375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,63,0.01586666703224182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,63,0.011802667131026586
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,127,0.011941333611806234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,127,0.015925332903862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,255,0.01369599997997284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,255,0.018170667191346485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,511,0.013199999928474426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,1023,0.01403733342885971
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,2047,0.021002667645613354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,511,0.009642666826645533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,1023,0.010293333480755487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,2047,0.012965332716703415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,4095,0.024495999018351238
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,4095,0.015461333096027374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,8191,0.02738133321205775
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,8191,0.035504000882307686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,16383,0.05118933320045471
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,16383,0.035402665535608925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,1,0.11204800009727478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,1,0.088128000497818
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,3,0.11457600196202596
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,7,0.11641066273053487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,3,0.09074667096138
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,7,0.09507200121879578
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,15,0.13614400227864584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,15,0.12144533793131511
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,31,0.13701333602269491
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,31,0.12085866928100586
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,63,0.13803733388582864
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,63,0.12144533793131511
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,127,0.16427200039227804
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,127,0.1450399955113729
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,255,0.24593599637349448
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,255,0.22316267093022665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,511,0.41072531541188556
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,511,0.3816426595052083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,1023,0.7422719796498617
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,1023,0.7015573183695475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,2047,1.413333257039388
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,1,0.11239467064539592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,2047,1.336858590443929
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,1,0.08834667007128398
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,3,0.11455999811490376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,3,0.09097066521644592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,7,0.11642133196194966
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,7,0.09553066889444987
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,15,0.13685866196950278
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,15,0.12178666392962138
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,31,0.13857600092887878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,63,0.13940266768137613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,31,0.12160000205039978
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,63,0.12192533413569133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,127,0.16597867012023926
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,127,0.14502400159835815
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,255,0.24946133295694986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,255,0.22339733441670737
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,511,0.41238399346669513
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,1023,0.7019039789835612
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,511,0.38227200508117676
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,1023,0.7561013698577881
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,2047,1.4212640126546223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,2047,1.3388214111328125
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,1,0.020410666863123577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,1,0.01626666635274887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,3,0.020799999435742695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,3,0.016085332880417507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,7,0.020389333367347717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,15,0.0206133338312308
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,15,0.01611199975013733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,7,0.016176000237464905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,31,0.02024000013868014
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,63,0.02075200031201045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,31,0.016410666207472484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,63,0.01594666639963786
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,127,0.017551999539136887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,255,0.03894400099913279
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,127,0.020069333414236706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,511,0.07353599866231282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,1023,0.07804800073305766
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,255,0.038933334251244865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,511,0.05969599882761637
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,1023,0.10572800040245056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,2047,0.10806399583816528
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,2047,0.1374666690826416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,1,0.012815999488035837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,3,0.027050666511058807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,1,0.026922665536403656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,3,0.012826666235923767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,7,0.02714666724205017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,7,0.012869333227475485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,15,0.027493332823117573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,15,0.012879999975363413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,31,0.027322667340437572
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,31,0.012810666114091873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,63,0.02681066592534383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,127,0.01462399959564209
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,127,0.027274665733178455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,63,0.013445333888133367
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,255,0.05240533252557119
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,255,0.03156800071398417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,511,0.043098668257395424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,511,0.0666720022757848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,1023,0.08316799998283386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,1023,0.059994667768478394
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,2047,0.12294399738311768
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,1,0.013306666165590286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,2047,0.08799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,3,0.013584000368913015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,1,0.014837333311637243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,3,0.014938666174809137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,7,0.01488000030318896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,7,0.013552000125249227
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,15,0.016176000237464905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,31,0.020010666300853092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,15,0.014111999422311783
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,63,0.019797333826621372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,31,0.014474666366974512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,63,0.014378666877746582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,127,0.016970666746298473
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,255,0.022511998812357586
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,255,0.02342933416366577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,127,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,511,0.03482666611671448
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,511,0.033861334125200905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,1023,0.059232001503308616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,1023,0.05402666827042898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,2047,0.10754666725794475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,2047,0.09456533193588257
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,4095,0.20454933245976767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,8191,0.33817601203918457
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,8191,0.39948801199595135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,4095,0.17552000284194946
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,1,0.013386666774749756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,16383,0.662559986114502
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,1,0.014741333822409311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,3,0.013765333841244379
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,16383,0.7998560269673666
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,3,0.015034666905800501
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,7,0.013482666263977686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,15,0.013904000322024027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,7,0.014943999548753103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,31,0.02000533292690913
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,31,0.014346666634082794
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,15,0.01621333385507266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,63,0.014357333381970724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,127,0.016597333053747814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,127,0.019962667177120846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,255,0.023226665953795116
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,63,0.019797333826621372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,511,0.03482666611671448
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,255,0.022410665949185688
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,511,0.03392533212900162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,1023,0.05936533212661743
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,1023,0.0543039987484614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,2047,0.10755200187365214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,2047,0.09448533256848653
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,4095,0.17554134130477905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,4095,0.2069279948870341
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,8191,0.4055413405100505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,8191,0.33843199412027997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,16383,0.8871733347574869
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,1,0.013301332791646322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,1,0.014933332800865173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,16383,0.6630560159683228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,3,0.013823999712864557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,3,0.015072000523408255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,7,0.013429333766301474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,31,0.014378666877746582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,15,0.014218666901191076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,31,0.01993600030740102
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,15,0.016255999604860943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,7,0.015050667027632395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,63,0.014592000593741735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,63,0.0199946661790212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,127,0.020117333779732387
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,127,0.01648533344268799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,255,0.02348800003528595
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,255,0.022650666534900665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,511,0.015274666249752045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,511,0.012624000509579977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,1023,0.016362667083740234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,1023,0.013845333208640417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,2047,0.025573333104451496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,4095,0.03018666555484136
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,4095,0.028304000695546467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,2047,0.024266667664051056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,8191,0.045328001181284584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,8191,0.04450666904449463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,16383,0.06671466430028279
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,16383,0.07109333574771881
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,1,0.014767999450365702
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,1,0.013461332768201828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,3,0.013717333475748697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,3,0.014981333166360855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,7,0.013487999637921652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,15,0.01626666635274887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,31,0.014309333016475042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,7,0.015125333021084467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,15,0.01421333352724711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,63,0.01440000037352244
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,31,0.02015999952952067
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,63,0.019839999576409657
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,127,0.020069333414236706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,255,0.013232000172138214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,127,0.016506666938463848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,511,0.009717333440979322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,255,0.01002133327225844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,511,0.01381333296497663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,1023,0.019738666713237762
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,2047,0.02923733244339625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,2047,0.020330666253964107
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,1023,0.01293333371480306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,4095,0.023733332753181458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,4095,0.0409706657131513
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,8191,0.05876266459623972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,8191,0.037402667105197906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,16383,0.06243200103441874
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,16383,0.08782399694124858
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,1,0.21951999266942343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,3,0.2246613303820292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,1,0.17116800944010416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,3,0.17628266414006552
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,15,0.2672106623649597
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,7,0.22709866364796957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,7,0.18421334028244019
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,31,0.26878400643666583
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,15,0.23721067110697427
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,31,0.23675199349721274
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,63,0.27034666140874225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,127,0.3226826588312785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,63,0.23761065800984701
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,255,0.4377280076344808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,127,0.28410132726033527
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,255,0.4853760004043579
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,511,0.7525973320007324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,511,0.8114559650421143
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,1023,1.4826614061991374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,1023,1.3853227297465007
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,1,0.17160000403722128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,1,0.21935999393463135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,3,0.22450133164723715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,7,0.22713599602381387
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,3,0.1763040026028951
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,15,0.2682186762491862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,15,0.23810132344563803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,7,0.18589866161346436
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,31,0.2716746727625529
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,63,0.27355732520421344
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,63,0.23891200621922812
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,31,0.23742934068044028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,127,0.2839413285255432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,255,0.49133865038553876
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,127,0.3263733386993408
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,511,0.8310026327768961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,255,0.4378186861673991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,511,0.7529066403706869
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,1023,1.4936480522155762
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,1,0.03505066782236099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,1,0.029322666426499683
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,1023,1.3892426490783691
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,3,0.029232000311215717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,3,0.03518400092919668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,7,0.02937600016593933
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,7,0.035071998834609985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,15,0.03492266684770584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,15,0.029029332101345062
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,31,0.035114665826161705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,63,0.03489600121974945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,31,0.02922133356332779
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,127,0.03470933437347412
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,63,0.02922666569550832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,255,0.07563733557860057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,255,0.06668266654014587
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,511,0.08424533406893413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,127,0.03249600032965342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,511,0.11373333136240642
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,1023,0.14063466588656107
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,1023,0.10628799597422282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,1,0.022437334060668945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,3,0.04816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,1,0.04816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,7,0.04818133513132731
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,3,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,15,0.048197334011395775
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,31,0.047925333182017006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,7,0.022474666436513264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,31,0.022431999444961548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,15,0.022389332453409832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,63,0.022645334402720135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,127,0.02640533447265625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,63,0.048026666045188904
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,127,0.04920533299446106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,255,0.056426664193471275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,511,0.07338133454322815
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,255,0.03702933341264725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,511,0.05486933390299479
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,1023,0.10800533493359883
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,1023,0.08299200236797333
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,3,0.4430933396021525
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,1,0.4332266648610433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,1,0.3378933270772298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,3,0.3477226495742798
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,7,0.4486453135808309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,15,0.468938668568929
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,15,0.5292693376541138
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,7,0.36177066961924237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,31,0.5319519837697347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,63,0.4699466625849406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,63,0.5351680119832357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,31,0.46806931495666504
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,127,0.562175989151001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,255,0.9686400095621744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,127,0.6393226782480875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,1,0.4329440196355184
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,1,0.338373343149821
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,255,0.867743968963623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,3,0.4436266819636027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,7,0.44781335194905597
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,3,0.3472586472829183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,7,0.3664533297220866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,15,0.4707893530527751
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,15,0.5321760177612305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,31,0.5383573373158773
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,31,0.4700053135553996
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,63,0.4721279939015706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,127,0.5627839962641398
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,63,0.5425120194753011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,127,0.6554133494695028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,255,1.0003413359324138
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,1,0.06332799792289734
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,255,0.8694506486256918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,3,0.06333866715431213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,3,0.05693333347638448
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,1,0.05513600011666616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,7,0.06380799909432729
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,15,0.06303999821345012
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,7,0.055215999484062195
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,15,0.055029332637786865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,31,0.06358933448791504
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,31,0.05539200206597646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,63,0.06394133468468984
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,127,0.06597333153088887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,63,0.055258666475613914
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,127,0.06468266745408376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,255,0.07341333230336507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,1,0.04340266684691111
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,3,0.08826133608818054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,1,0.08801600337028503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,255,0.10100799798965454
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,3,0.04348266621430715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,7,0.08813866972923279
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,7,0.04348266621430715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,15,0.04349866509437561
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,31,0.08918399612108867
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,15,0.08802666266759236
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,63,0.04355733096599579
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,63,0.08887466788291931
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,31,0.04561600089073181
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,255,0.10234133402506511
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,255,0.07763733466466267
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,127,0.06300800045331319
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,127,0.08989866574605306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,1,0.6722026666005453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,3,0.8823200066884359
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,1,0.8613599936167399
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,3,0.6913599967956543
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,7,0.8926400343577067
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,7,0.7199733257293701
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,15,1.056437333424886
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,31,1.0636106332143147
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,31,0.931717316309611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,15,0.933141311009725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,63,1.080410639444987
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,127,1.2955786387125652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,63,0.9376800060272217
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,127,1.1254453659057617
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,1,0.8622506459554037
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,1,0.6732266743977865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,3,0.8850186665852865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,3,0.6910133361816406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,7,0.7281386852264404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,7,0.8923947016398112
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,15,0.9376800060272217
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,31,1.0892106691996257
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,15,1.0637280146280925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,31,0.9366880257924398
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,63,1.1074613730112712
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,127,1.324677308400472
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,1,0.11972799897193909
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,63,0.9449706872304281
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,1,0.16426666577657065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,3,0.16358400384585062
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,127,1.1307040055592854
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,7,0.12012799580891927
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,3,0.11943466464678447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,7,0.1637333333492279
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,15,0.11988799770673116
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,31,0.12091199556986491
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,31,0.16307199994723
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,63,0.1614400049050649
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,15,0.16160533825556436
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,63,0.12120532989501953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,127,0.12099200487136841
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,127,0.17863466342290243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,1,0.16531200210253397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,1,0.10876267155011494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,3,0.16570666432380676
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,3,0.10831999778747559
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,7,0.1662613352139791
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,15,0.10849066575368245
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,31,0.16926934321721396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,7,0.10867733756701152
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,15,0.16757865746816
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,31,0.10877333084742229
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,63,0.16826667388280234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,1,0.01882133384545644
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,63,0.10825600226720174
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,3,0.015226667126019796
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,1,0.014581333845853806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,127,0.16823466618855795
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,3,0.019007999449968338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,127,0.12339733044306438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,7,0.019296000401178997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,7,0.01553600033124288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,15,0.021226666867733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,15,0.019317333896954853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,31,0.021573332448800404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,31,0.01929066702723503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,127,0.02290133386850357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,127,0.025231999655564625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,63,0.01945066700379054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,63,0.021546666820844013
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,255,0.033215999603271484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,255,0.03628266602754593
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,511,0.05820799867312113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,1023,0.1018506685892741
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,511,0.054042667150497437
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,1023,0.09611733754475911
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,4095,0.3627893527348836
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,4095,0.34570666154225665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,2047,0.18887466192245483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,2047,0.17966399590174356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,8191,0.7103359699249268
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,8191,0.678266684214274
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,1,0.018751999984184902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,16383,1.6775093078613281
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,16383,1.4133386611938477
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,1,0.014837333311637243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,3,0.015210667004187902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,3,0.01922133316596349
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,7,0.01941866676012675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,15,0.021189334491888683
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,7,0.01570133368174235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,15,0.01937066639463107
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,31,0.021562665700912476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,31,0.01941866676012675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,63,0.02162133405605952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,127,0.025445332129796345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,63,0.019434666881958645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,127,0.02298133323589961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,255,0.03633599976698557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,255,0.03305600086847941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,511,0.05804799993832906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,511,0.05429333448410034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,1023,0.09610133369763692
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,2047,0.17964265743891397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,1023,0.10222933689753215
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,4095,0.3662399848302205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,2047,0.18902933597564697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,4095,0.34515734513600665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,8191,0.7449706395467123
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,1,0.014842666685581207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,16383,1.8509173393249512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,16383,1.4494986534118652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,8191,0.6784959634145101
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,3,0.014975999792416891
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,7,0.01932266727089882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,1,0.018976000448067982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,3,0.019141333798567455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,7,0.015781333049138386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,15,0.021349333226680756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,15,0.01970133309563001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,63,0.01952533299724261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,31,0.019402666638294857
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,63,0.02161066730817159
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,127,0.025311999022960663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,31,0.021477334201335907
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,127,0.022895999252796173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,255,0.015077333897352219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,255,0.01191466674208641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,511,0.015589332828919092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,1023,0.023552000522613525
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,1023,0.025407999753952026
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,511,0.013647999614477158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,2047,0.034858666360378265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,2047,0.03367999941110611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,4095,0.05096533397833506
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,4095,0.05354666709899902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,8191,0.07394133508205414
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,1,0.007413333281874657
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,1,0.010741333166758219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,8191,0.07759466767311096
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,16383,0.10867200295130412
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,3,0.010826667149861654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,16383,0.13493333260218301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,3,0.008074666683872541
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,7,0.010741333166758219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,7,0.007365333537260692
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,15,0.010938666760921478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,31,0.0075093333919843035
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,127,0.010591999938090643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,31,0.01097600037852923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,63,0.010266666611035665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,15,0.010069333637754122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,63,0.011941333611806234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,127,0.00766933336853981
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,255,0.009418666362762451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,255,0.01370666672786077
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,511,0.020128000527620316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,1023,0.028751999139785767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,511,0.011045332998037338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,1023,0.019509332875410717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,2047,0.043680002291997276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,4095,0.0675786683956782
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,2047,0.02779199928045273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,4095,0.041834667325019836
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,8191,0.08528000116348267
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,8191,0.06039466460545858
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,16383,0.1204213301340739
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,16383,0.08785067001978557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,1,1.3785866101582844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,3,1.8186453183492024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,3,1.419482707977295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,7,1.8360212643941243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,1,1.776586691538493
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,15,2.1343413988749185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,7,1.4721439679463704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,15,1.8722826639811199
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,31,2.1483786900838218
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,31,1.8696959813435872
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,63,1.8806986808776855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,63,2.1669012705485025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,1,1.780197302500407
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,1,1.3810985883076985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,7,1.8381333351135254
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,7,1.485904057820638
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,3,1.8248000144958496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,15,2.14847469329834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,3,1.4203732808430989
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,15,1.8804853757222493
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,63,2.208127975463867
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,63,1.8899253209431965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,1,0.32465600967407227
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,31,2.188042640686035
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,31,1.8773867289225261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,1,0.2321280042330424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,3,0.23085866371790567
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,3,0.32444800933202106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,7,0.23122133811314902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,7,0.3257439931233724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,15,0.23146132628122965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,15,0.32547734181086224
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,31,0.23184533913930258
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,31,0.3253920078277588
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,63,0.22986133893330893
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,1,0.3277920087178548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,63,0.324565331141154
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,1,0.20824533700942993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,3,0.32738665739695233
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,7,0.2079733411471049
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,7,0.3281066616376241
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,15,0.2081973354021708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,3,0.20787199338277182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,15,0.3279573321342468
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,31,0.3281973401705424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,63,0.3255946636199951
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,31,0.20806399981180826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,63,0.20713067054748535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,fp8,1,2.751215934753418
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,fp8,3,2.833301226298014
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,fp8,7,2.937093416849772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,float16,7,3.6654399236043296
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,float16,15,4.263759930928548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,float16,3,3.6270666122436523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,float16,1,3.5469652811686196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,float16,31,4.288890520731608
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,fp8,31,3.7318719228108725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,fp8,1,2.7569173177083335
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,float16,1,3.5563252766927085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,fp8,3,2.8345438639322915
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,fp8,15,3.739114761352539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,float16,3,3.6409918467203775
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,float16,7,3.667130788167318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,fp8,7,2.964282671610514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,fp8,15,3.754159927368164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,float16,15,4.289471944173177
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,float16,31,4.370272000630696
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,fp8,31,3.7484000523885093
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,float16,1,0.45023465156555176
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,fp8,1,0.642959992090861
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,fp8,3,0.6409013271331787
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,float16,7,0.45052266120910645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,float16,15,0.44895466168721515
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,fp8,7,0.6402080059051514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,float16,3,0.44941333929697674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,fp8,15,0.6409386793772379
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,float16,31,0.45211199919382733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,fp8,1,0.4071679910024007
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,float16,3,0.6447519858678182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,float16,1,0.6457173426946005
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,float16,7,0.6439040104548136
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,fp8,7,0.40748798847198486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,fp8,31,0.6432000001271566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,float16,15,0.6469279925028483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,fp8,3,0.40742401281992596
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,1,0.033802665770053864
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,1,0.02548266698916753
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,3,0.026485333840052288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,3,0.034272000193595886
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,fp8,15,0.4073226849238078
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,float16,31,0.645034670829773
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,7,0.03426666557788849
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,7,0.027237333357334137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,15,0.03789866715669632
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,15,0.03448000053564707
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,31,0.03860266755024592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,31,0.034304000437259674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,63,0.03903999924659729
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,127,0.04600533346335093
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,63,0.034458667039871216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,fp8,31,0.40743998686472577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,127,0.0405173326532046
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,255,0.06761066615581512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,255,0.06083733340104421
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,511,0.11011200149854024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,511,0.10187199711799622
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,1023,0.19555733601252237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,4095,0.7073866526285807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,1023,0.18413333098093668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,2047,0.3664906819661458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,2047,0.34804801146189374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,4095,0.6731946468353271
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,8191,1.4167787233988445
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,1,0.03364799916744232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,8191,1.3245279788970947
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,1,0.025445332129796345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,3,0.034202667574087776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,7,0.027450665831565857
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,3,0.026528000831604004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,15,0.034517332911491394
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,15,0.03824000060558319
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,7,0.03422400106986364
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,31,0.03878933439652125
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,31,0.03451200077931086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,63,0.03472533325354258
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,127,0.040693332751592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,63,0.03905600061019262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,255,0.06797866523265839
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,127,0.046384001771608986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,255,0.06098666787147522
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,511,0.11065600315729777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,511,0.10197866956392924
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,1023,0.19604265689849854
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,1023,0.18440000216166177
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,2047,0.34833598136901855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,4095,0.6727840105692545
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,2047,0.36686933040618896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,8191,1.4611573219299316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,4095,0.73089599609375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,8191,1.329306681950887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,1,0.012517333030700684
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,1,0.009242666885256767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,3,0.012351999680201212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,3,0.009189333145817121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,7,0.00903466654320558
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,15,0.00914666677514712
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,7,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,15,0.012362666428089142
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,31,0.01259200026591619
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,31,0.009125333279371262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,63,0.012266666938861212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,63,0.00915733352303505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,127,0.012367999802033106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,127,0.009973333527644476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,511,0.02387733260790507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,255,0.016069332758585613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,255,0.012128000458081564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,511,0.022687998910744984
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,1023,0.034186666210492454
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,1023,0.04151466737190882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,2047,0.05400000015894572
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,2047,0.05921066800753275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,4095,0.08379733562469482
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,8191,0.13595733046531677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,1,0.011215999722480774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,8191,0.10825066765149434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,4095,0.10873066385587056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,1,0.00847999999920527
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,3,0.007242666557431221
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,7,0.011125333607196808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,3,0.011120000233252844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,7,0.007391999786098798
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,15,0.011130666981140772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,31,0.007642666498819987
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,15,0.008309333274761835
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,63,0.007280000175038974
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,31,0.010938666760921478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,63,0.011349332829316458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,127,0.01098666712641716
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,127,0.007823999971151352
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,255,0.021749332547187805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,255,0.010389333590865135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,511,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,1023,0.033200000723203026
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,2047,0.06588799754778545
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,511,0.02886933336655299
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,1023,0.04624533156553904
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,2047,0.04085866610209147
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,4095,0.08436266581217448
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,4095,0.0591839998960495
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,8191,0.12066133817036946
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,1,0.0450186679760615
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,8191,0.08717333277066548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,1,0.03642133375008901
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,3,0.04574933151404063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,3,0.037733333806196846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,7,0.046762665112813316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,7,0.03888533264398575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,15,0.054976001381874084
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,31,0.05542933444182078
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,15,0.049098665515581764
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,63,0.04952000081539154
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,31,0.049135997891426086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,127,0.06644266843795776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,63,0.05579733351866404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,127,0.05835199852784475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,255,0.09845333298047383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,255,0.0886346697807312
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,511,0.1616373360157013
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,511,0.1497066617012024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,1023,0.2725173234939575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,1023,0.28886399666468304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,2047,0.5413333177566528
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,4095,1.0704267024993896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,2047,0.5165226856867472
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,1,0.04505600035190582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,4095,1.0000426769256592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,1,0.03661333272854487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,3,0.04590400060017904
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,7,0.03933866570393244
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,15,0.049466664592425026
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,3,0.03749333322048187
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,31,0.05568000177542368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,7,0.046485334634780884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,15,0.05482666691144308
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,31,0.04934399823347727
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,63,0.055973331133524575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,63,0.049626668294270836
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,127,0.06642666459083557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,255,0.0988213320573171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,127,0.05862933397293091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,255,0.08861866593360901
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,511,0.16170133153597513
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,511,0.14970133701960245
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,1023,0.27269333600997925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,2047,0.5164639949798584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,1023,0.28897066911061603
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,4095,1.0881386597951253
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,float16,2047,0.5560373465220133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,1,float16,fp8,4095,1.0027680397033691
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,3,0.012896000097195307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,7,0.013045333325862885
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,31,0.013141332815090815
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,1,0.012917333592971167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,15,0.012815999488035837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,63,0.012847999731699625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,127,0.012784000486135483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,255,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,1023,0.05557866891225179
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,511,0.03367999941110611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,2047,0.07950399816036224
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,4095,0.10433600346247356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,1,0.016250666230916977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,7,0.01609066625436147
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,3,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,15,0.015573333948850632
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,1,0.008016000191370646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,3,0.007850666840871176
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,7,0.008389333263039589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,63,0.015925332903862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,15,0.008394666636983553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,31,0.007637333124876022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,63,0.007717333113153775
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,31,0.015765332927306492
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,127,0.016042667130629223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,127,0.00871999996403853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,255,0.030746666093667347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,255,0.016805333395799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,511,0.03018666555484136
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,1023,0.03792533278465271
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,511,0.04674666623274485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,1023,0.061434666315714516
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,2047,0.055919999877611794
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,4095,0.08359466989835103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,4095,0.11775466799736023
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,1,0.010175999874869982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,1,0.009472000102202097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,3,0.008453333129485449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,3,0.009610666582981745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,7,0.008330666770537695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,7,0.00956266683836778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,2047,0.08248533308506012
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,15,0.009519999846816063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,31,0.009359999870260557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,15,0.008565333361426989
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,63,0.009317333499590555
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,127,0.009642666826645533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,63,0.012159999459981918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,31,0.010442666709423065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,127,0.012096000214417776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,255,0.012309333930412928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,255,0.010575999816258749
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,511,0.02804800122976303
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,511,0.028010666370391846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,1023,0.03263466556866964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,1023,0.03232000023126602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,2047,0.04386133452256521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,2047,0.043354665239652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,4095,0.06597333153088887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,4095,0.06471466521422069
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,8191,0.11009599765141805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,16383,0.19779199361801147
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,8191,0.1072266697883606
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,16383,0.1920213301976522
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,1,0.00842666688064734
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,1,0.009466666728258133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,3,0.008378666515151659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,3,0.009589333087205887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,7,0.008389333263039589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,7,0.009546666716535887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,15,0.008602666358153025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,15,0.009583999713261923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,31,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,31,0.011962667107582092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,63,0.02089600016673406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,63,0.017893332988023758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,127,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,127,0.009541333342591921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,255,0.012128000458081564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,255,0.010490667074918747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,511,0.028064000109831493
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,511,0.027866666515668232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,1023,0.03236266722281774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,1023,0.032645332316557564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,2047,0.04387199878692627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,2047,0.04370133578777313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,4095,0.06578133503595988
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,4095,0.06490133206049602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,8191,0.10998400052388509
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,8191,0.1074773371219635
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,fp8,16383,0.19223467508951822
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,1,float16,float16,16383,0.1977013349533081
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,1,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,3,0.008453333129485449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,7,0.008314666648705801
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,15,0.008757333581646284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,31,0.01089599976936976
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,127,0.009509333098928133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,63,0.0100426667680343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,255,0.010378666842977205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,511,0.027845333019892376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,1023,0.032458665470282234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,2047,0.016943999876578648
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,4095,0.02059200033545494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,8191,0.02250666668017705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,16383,0.02516266703605652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,1,0.009413333609700203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,1,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,3,0.008266666904091835
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,3,0.014602666099866232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,7,0.008453333129485449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,7,0.010015999898314476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,15,0.008538666491707167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,15,0.009877333417534828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,31,0.00921066664159298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,31,0.010426666587591171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,63,0.009509333098928133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,63,0.012442667037248611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,127,0.00938666673998038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,127,0.01231466606259346
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,255,0.010634666929642359
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,255,0.01240533341964086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,511,0.027893332143624622
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,511,0.027888000011444092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,1023,0.013616000612576803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,1023,0.009839999799927076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,2047,0.015077333897352219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,2047,0.011253333340088526
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,8191,0.025008000433444977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,8191,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,16383,0.030016000072161358
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,16383,0.021141332884629566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,1,0.01357866699496905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,1,0.00961599995692571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,3,0.013541333377361298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,3,0.010197333370645842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,4095,0.012896000097195307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,4095,0.015781333049138386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,7,0.013584000368913015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,7,0.008933333059151968
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,15,0.013925333817799887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,63,0.01544533297419548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,15,0.009541333342591921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,31,0.015450666348139444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,31,0.011658667276302973
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,63,0.011488000551859537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,127,0.015696000307798386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,255,0.01328533391157786
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,127,0.011600000162919363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,255,0.018031999468803406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,511,0.018677332748969395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,511,0.023941333095232647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,1023,0.03622400015592575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,2047,0.05005866785844167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,2047,0.06083733340104421
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,4095,0.09149332841237386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,4095,0.10912000139554341
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,8191,0.20551466941833496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,8191,0.1742453376452128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,1,0.009205333267649015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,16383,0.39927999178568524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,16383,0.339136004447937
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,3,0.008842666943868002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,3,0.01358933374285698
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,7,0.013493333011865616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,1023,0.029018667836983997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,7,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,15,0.013877333452304205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,15,0.017727999637524288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,63,0.012863999853531519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,63,0.015520000209410986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,31,0.015226667126019796
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,31,0.011391999820868174
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,127,0.011503999431928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,127,0.015930666277805965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,255,0.017909333109855652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,255,0.013461332768201828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,511,0.01858666663368543
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,511,0.023946667710940044
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,1,0.013397333522637686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,2047,0.05007466673851013
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,1023,0.0364479993780454
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,1023,0.02938666691382726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,2047,0.060720001657803856
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,4095,0.1088800032933553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,4095,0.09159466624259949
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,8191,0.2054133415222168
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,8191,0.17405333121617636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,float16,16383,0.39930665493011475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,1,float16,fp8,16383,0.3392426570256551
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,1,0.013514666507641474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,3,0.013594667116800943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,7,0.013477332890033722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,15,0.014111999422311783
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,31,0.015376000354687372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,63,0.015573333948850632
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,127,0.01563199982047081
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,255,0.017680000513792038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,511,0.02402666707833608
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,1023,0.015439999600251516
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,4095,0.019386666516462963
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,2047,0.017978666971127193
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,8191,0.030410667260487873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,16383,0.0369759996732076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,1,0.013471999516089758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,1,0.008789333204428354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,3,0.008661333471536636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,3,0.013429333766301474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,7,0.013471999516089758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,7,0.008842666943868002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,15,0.013983999689420065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,15,0.009578666960199675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,31,0.015295999745527903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,31,0.017621333400408428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,63,0.015696000307798386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,127,0.01590399940808614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,63,0.013023999830087027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,127,0.01301866645614306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,255,0.013536000003417334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,255,0.017903999735911686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,511,0.013013333082199097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,511,0.008943999807039896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,1023,0.013845333208640417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,2047,0.02093333254257838
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,1023,0.010005333150426546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,2047,0.012784000486135483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,4095,0.024005333582560223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,4095,0.015141333142916361
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,8191,0.025807999074459076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,8191,0.03481066723664602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,16383,0.05038933455944061
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,16383,0.03266133368015289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,1,0.08603200316429138
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,1,0.06740266581376393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,3,0.08755733569463094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,3,0.06958400209744771
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,7,0.0888320008913676
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,7,0.0726506660381953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,15,0.10345066587130229
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,15,0.09246933460235596
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,31,0.10478400190671285
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,31,0.09233066439628601
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,63,0.1056106686592102
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,63,0.09291199843088786
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,127,0.12584533294041952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,255,0.1881600022315979
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,127,0.11003200213114421
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,255,0.16859734058380127
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,511,0.31043734153111774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,511,0.28723732630411786
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,1023,0.5582773288091024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,1023,0.5264533360799154
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,2047,1.0708159605662029
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,2047,1.0017706553141277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,fp8,1,0.06770133475462596
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,float16,1,0.08575466275215149
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,fp8,3,0.06956799825032552
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,fp8,7,0.07292266686757405
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,float16,7,0.08852266271909077
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,float16,15,0.10356266299883525
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,float16,31,0.10498666763305664
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,fp8,15,0.09256533781687419
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,float16,63,0.10596799850463867
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,float16,3,0.08752533793449402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,fp8,63,0.0930560032526652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,float16,127,0.12618133425712585
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,fp8,255,0.16851733128229776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,float16,255,0.18866666158040366
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,fp8,127,0.11007466912269592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,float16,511,0.31099732716878253
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,fp8,31,0.09231999516487122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,fp8,511,0.2874346574147542
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,fp8,1023,0.5266773303349813
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,fp8,2047,1.0041440327962239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,float16,2047,1.0843946933746338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,1,float16,float16,1023,0.5773173173268636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,1,0.019861333072185516
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,3,0.019626667102177937
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,7,0.019717333217461903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,15,0.01966399947802226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,31,0.019706666469573975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,127,0.019354666272799175
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,63,0.019424000134070713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,255,0.037104000647862755
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,511,0.057562669118245445
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,1023,0.07566399872303009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,2047,0.10502400000890096
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,1,0.02568000058333079
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,1,0.012149333953857422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,3,0.02621866762638092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,7,0.0262719988822937
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,7,0.012373333175977072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,15,0.026176000634829204
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,63,0.02625600000222524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,63,0.012266666938861212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,3,0.012186666329701742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,127,0.026389333109060924
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,127,0.013994666437307993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,255,0.0510453333457311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,255,0.029834667841593426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,511,0.0653706689675649
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,15,0.01221866657336553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,31,0.011850666254758835
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,511,0.04081066697835922
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,31,0.02611200014750163
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,1023,0.08125866452852885
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,1023,0.05606933434804281
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,2047,0.08427733182907104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,1,0.019029332945744198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,1,0.014671999961137772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,3,0.019167999426523846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,3,0.014815999815861383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,2047,0.11900800466537476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,7,0.01899733394384384
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,7,0.01482133318980535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,15,0.019760000209013622
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,15,0.014741333822409311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,31,0.02164799968401591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,63,0.0223786657055219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,63,0.01674666628241539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,127,0.02250133454799652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,31,0.016634666671355564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,127,0.016832000265518825
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,255,0.01950399950146675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,255,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,511,0.035258665680885315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,511,0.026767998933792114
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,1023,0.053717335065205894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,2047,0.09037333726882935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,1023,0.041984001795450844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,2047,0.07263466715812683
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,4095,0.1637493371963501
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,4095,0.13384000460306802
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,8191,0.309663991133372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,16383,0.6022400061289469
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,8191,0.256442666053772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,16383,0.5013386805852255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,1,0.019173332800467808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,1,0.014597332725922266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,3,0.019178666174411774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,3,0.014864000181357065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,7,0.01911466692884763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,7,0.014805333067973455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,15,0.019706666469573975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,15,0.01463466634353002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,31,0.021701333423455555
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,31,0.016762666404247284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,63,0.02233600119749705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,63,0.016800000021855038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,127,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,127,0.016906666258970898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,255,0.0258240004380544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,255,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,511,0.034976000587145485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,1023,0.053685332338015236
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,2047,0.09046399593353271
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,1023,0.041877334316571556
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,511,0.027098665634791057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,2047,0.07250666618347168
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,4095,0.16339199741681418
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,4095,0.13378666838010153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,8191,0.30982400973637897
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,8191,0.25642667214075726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,float16,16383,0.6690560181935629
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,1,float16,fp8,16383,0.5012426773707072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,1,0.019152000546455383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,3,0.019296000401178997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,7,0.01929066702723503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,15,0.01978133370478948
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,31,0.021925332645575207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,63,0.02199466774861018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,127,0.0222080002228419
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,255,0.025600001215934753
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,511,0.01469333345691363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,1023,0.015594666202863058
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,4095,0.02956266701221466
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,2047,0.025114665428797405
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,8191,0.04330133398373922
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,16383,0.0639466643333435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,1,0.014511999984582266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,1,0.018895999838908512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,3,0.018960000326236088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,7,0.01907733331123988
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,3,0.014581333845853806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,7,0.014815999815861383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,15,0.014533333480358124
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,15,0.019695999721686046
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,31,0.021749332547187805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,63,0.02219199885924657
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,31,0.016704000532627106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,63,0.016783999900023144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,127,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,127,0.016869333883126576
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,255,0.008506666868925095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,255,0.012805332740147909
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,511,0.013429333766301474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,511,0.00938666673998038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,1023,0.019306667149066925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,2047,0.028688001135985058
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,1023,0.011312000453472137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,2047,0.019061333189407986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,4095,0.0406986673672994
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,4095,0.022858666876951855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,16383,0.08566932876904805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,8191,0.03681066632270813
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,16383,0.059605335195859276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,1,0.1668000022570292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,1,0.13012799620628357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,3,0.1334879994392395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,3,0.17005866765975952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,7,0.17170133193333945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,7,0.14037332932154337
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,15,0.20193066199620566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,8191,0.057274664441744484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,31,0.20455465714136759
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,15,0.179802676041921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,63,0.20595200856526694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,63,0.18052800496419272
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,31,0.17939732472101846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,127,0.24599466721216837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,255,0.3305600086847941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,127,0.21448532740275064
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,255,0.3696426550547282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,511,0.6123679876327515
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,511,0.5675839980443319
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,1023,1.044159968694051
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,1023,1.12227201461792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,fp8,1,0.13014933466911316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,float16,1,0.16597333550453186
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,float16,3,0.16988267501195273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,fp8,3,0.13352533181508383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,float16,7,0.17143466075261435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,float16,15,0.20211732387542725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,fp8,7,0.14063466588656107
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,float16,31,0.20510399341583252
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,fp8,15,0.18013866742451987
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,fp8,63,0.18060266971588135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,fp8,31,0.17964800198872885
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,float16,127,0.2466826637585958
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,fp8,127,0.21422400077184042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,float16,255,0.37033601601918537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,float16,511,0.6325279871622721
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,fp8,255,0.3306506673494975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,fp8,511,0.5677760044733683
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,float16,1023,1.1321173508961995
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,float16,63,0.20670932531356812
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,3,0.034490667283535004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,1,float16,fp8,1023,1.048192024230957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,7,0.034714666505654655
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,15,0.03465600063403448
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,1,0.03436266630887985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,63,0.03421333432197571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,31,0.034202667574087776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,127,0.0337119996547699
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,255,0.06407466530799866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,511,0.08083733419577281
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,1023,0.10502933462460835
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,1,0.046997333566347756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,1,0.021040000021457672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,3,0.021045332153638203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,7,0.020981334149837494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,15,0.04701866706212362
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,15,0.021146667500336964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,7,0.04683733483155569
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,3,0.047007997830708824
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,31,0.0472320020198822
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,63,0.02102400114138921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,31,0.0210506667693456
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,63,0.046623999873797096
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,127,0.047770669062932335
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,127,0.02478933334350586
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,511,0.07167466481526692
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,1023,0.07946133116881053
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,1023,0.10600533088048299
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,1,0.32742400964101154
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,1,0.2559093236923218
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,255,0.03367999941110611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,3,0.3341279824574788
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,3,0.2622986634572347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,7,0.2747199932734172
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,7,0.33852267265319824
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,511,0.05154666801293691
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,15,0.3991893529891968
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,15,0.3540906508763631
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,255,0.05481599768002828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,63,0.3556640148162842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,63,0.40674134095509845
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,31,0.40451733271280926
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,31,0.35358933607737225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,127,0.42244799931844074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,255,0.7344319820404053
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,127,0.486410657564799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,255,0.6530880133310953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,float16,1,0.3267199993133545
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,fp8,1,0.25572266181310016
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,float16,3,0.3344693183898926
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,fp8,7,0.2760746677716573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,float16,15,0.3993599812189738
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,fp8,3,0.26145599285761517
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,float16,7,0.33770668506622314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,fp8,15,0.3545653422673543
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,float16,31,0.4052746693293254
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,fp8,31,0.3540000120798747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,fp8,63,0.3561813433965047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,float16,63,0.40855999787648517
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,float16,127,0.4897173245747884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,float16,255,0.7573280334472656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,fp8,127,0.42262399196624756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,1,float16,fp8,255,0.6536213159561157
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,1,0.0621973325808843
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,3,0.06214933097362518
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,7,0.0621919979651769
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,15,0.06201066573460897
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,31,0.061808000008265175
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,63,0.0613919993241628
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,127,0.06285866598288219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,255,0.07056533296902974
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,1,0.0860053300857544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,1,0.039333333571751915
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,7,0.0860746701558431
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,15,0.08611733714739482
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,7,0.0394400010506312
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,3,0.0863146682580312
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,3,0.039247999588648476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,31,0.08668800195058186
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,31,0.03955733279387156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,63,0.08672533432642619
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,63,0.03907199949026108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,127,0.08809600273768108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,255,0.09841600060462952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,127,0.053717335065205894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,255,0.0705866664648056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,1,0.6505279938379923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,1,0.5076800187428793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,15,0.039408000806967415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,3,0.5206026633580526
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,3,0.6642453273137411
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,7,0.6719466845194498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,7,0.543008009592692
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,15,0.7034719785054525
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,31,0.8041760126749674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,15,0.7934933503468832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,31,0.7024906476338705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,63,0.7063626448313395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,63,0.8101973533630371
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,127,0.9881280263264974
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,127,0.8432479699452718
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,float16,1,0.6492533286412557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,fp8,1,0.5079040129979452
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,float16,3,0.6640479962031046
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,fp8,3,0.5186773141225179
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,float16,7,0.6689759890238444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,float16,15,0.7941706975301107
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,fp8,7,0.5457173188527426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,fp8,15,0.7038933436075846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,float16,31,0.8087200323740641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,fp8,31,0.7027786572774252
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,fp8,63,0.7080533504486084
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,float16,63,0.8328426678975424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,float16,127,1.0048426787058513
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,1,float16,fp8,127,0.85099196434021
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,1,0.11342933773994446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,7,0.11378666758537292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,15,0.11424533526102702
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,3,0.11373866597811381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,31,0.11455466349919637
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,63,0.11466667056083679
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,127,0.11421333750089009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,1,0.15894400080045065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,1,0.09556266665458679
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,3,0.15892799695332846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,3,0.09589866797129314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,7,0.15989333391189575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,7,0.09590933720270793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,15,0.16080000003178915
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,15,0.09586133559544881
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,31,0.16200000047683716
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,31,0.09558399518330891
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,63,0.16210132837295532
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,63,0.09543466567993164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,127,0.1618773341178894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,127,0.10958932836850484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,1,0.01403733342885971
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,1,0.013701333353916803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,3,0.014186666657527288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,3,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,7,0.014021333307027817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,15,0.016645333419243496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,15,0.01578666642308235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,7,0.014368000129858652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,31,0.016879999389251072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,63,0.016927999754746754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,31,0.016879999389251072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,255,0.02815466622511546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,255,0.02606933315594991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,511,0.042117332418759666
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,1023,0.07818666597207387
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,2047,0.1440000037352244
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,2047,0.137472003698349
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,63,0.015834666788578033
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,127,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,4095,0.26344533761342365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,511,0.04478933413823446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,4095,0.2757440010706584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,127,0.019546666493018467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,8191,0.5395466486612955
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,8191,0.5165866613388062
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,1,0.01403733342885971
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,16383,1.3211999734242756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,16383,1.0665919780731201
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,1,0.01360000049074491
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,1023,0.07357866565386455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,3,0.013999999811251959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,3,0.01381333296497663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,15,0.015568000574906668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,7,0.014645333091417948
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,7,0.014015999933083853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,15,0.01637866720557213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,31,0.01653333380818367
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,63,0.015754666179418564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,31,0.01570133368174235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,63,0.016858667135238647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,127,0.019695999721686046
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,127,0.018298666924238205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,255,0.027866666515668232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,511,0.04471466441949209
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,255,0.02603733291228612
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,511,0.04200533529122671
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,1023,0.07808533310890198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,1023,0.07394666473070781
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,4095,0.27875200907389325
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,2047,0.1439893345038096
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,2047,0.13715733091036478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,4095,0.26340266068776447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,8191,0.5507200161616007
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,16383,1.0944586594899495
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,fp8,8191,0.5165653228759766
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,1,float16,float16,16383,1.4287734031677246
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,1,0.0174346665541331
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,7,0.014346666634082794
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,3,0.014122666170199713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,15,0.01913600042462349
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,31,0.016741332908471424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,63,0.016864000509182613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,127,0.019765333582957584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,255,0.014474666366974512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,511,0.015354666858911514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,1023,0.02276266614596049
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,2047,0.033615998923778534
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,8191,0.07228266696135204
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,4095,0.04894933104515076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,16383,0.10691733161608379
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,1,0.010725333044926325
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,1,0.007146666447321574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,3,0.010645333677530289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,3,0.007082666580875714
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,7,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,7,0.00726400005320708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,15,0.010751999914646149
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,15,0.007167999943097432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,31,0.007087999954819679
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,63,0.010458666831254959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,63,0.007365333537260692
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,127,0.010591999938090643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,31,0.01089599976936976
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,255,0.013157332936922709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,127,0.007941333577036858
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,511,0.019434666881958645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,511,0.011663999408483505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,255,0.008879999940594038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,1023,0.01874133323629697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,1023,0.028058665494124096
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,2047,0.04265599946180979
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,2047,0.02630399912595749
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,4095,0.06634666522343953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,4095,0.04058666775623957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,8191,0.0839413305123647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,8191,0.05729066828886668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,16383,0.11800000071525574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,16383,0.08513066172599792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,1,1.3362719217936199
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,1,1.0392639636993408
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,3,1.3661279678344727
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,3,1.0693066914876301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,7,1.3789812723795574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,7,1.1109973589579265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,15,1.6049493153889973
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,15,1.4115467071533203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,31,1.632757345835368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,31,1.4096906979878743
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,63,1.6472266515096028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,63,1.4191093444824219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,float16,1,1.3419307072957356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,fp8,1,1.0441439946492512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,float16,3,1.3726026217142742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,fp8,3,1.0709760189056396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,float16,7,1.38481601079305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,fp8,7,1.1203680038452148
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,float16,15,1.613375981648763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,fp8,15,1.414570649464925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,float16,31,1.6459520657857258
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,fp8,31,1.4127039909362793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,float16,63,1.663690725962321
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,1,float16,fp8,63,1.4218719800313313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,1,0.2184106707572937
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,3,0.2181653380393982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,7,0.21735467513402304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,15,0.2181333303451538
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,31,0.21829867362976074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,63,0.21809067328770956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,1,0.31191466252009076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,3,0.3117226759592692
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,1,0.18242132663726807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,3,0.1825760006904602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,7,0.31243733565012616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,7,0.18316799402236938
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,15,0.18266133467356363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,15,0.3129066626230876
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,31,0.1821333368619283
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,31,0.314191997051239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,63,0.311514675617218
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,63,0.1823093295097351
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,float16,1,2.675050735473633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,fp8,1,2.0775787035624185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,fp8,3,2.1361172993977866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,float16,3,2.7330878575642905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,fp8,7,2.217151959737142
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,float16,7,2.7568372090657554
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,fp8,15,2.816944122314453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,float16,15,3.2040319442749023
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,fp8,31,2.8132638931274414
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,float16,31,3.2586186726888022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,1,float16,fp8,1,2.084005355834961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,1,float16,float16,1,2.6819947560628257
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,1,float16,float16,3,2.7417065302530923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,1,float16,fp8,3,2.1359786987304688
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,1,float16,float16,7,2.7637707392374673
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,1,float16,float16,15,3.220282554626465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,1,float16,fp8,7,2.2347893714904785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,1,float16,fp8,15,2.8230133056640625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,1,float16,float16,31,3.2839787801106772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,1,float16,fp8,31,2.8190558751424155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,float16,1,0.42530667781829834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,float16,7,0.4267626603444417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,float16,3,0.42485864957173664
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,float16,15,0.42446398735046387
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,float16,31,0.4265386660893758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,float16,1,0.6165653467178345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,fp8,1,0.3553973436355591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,float16,3,0.6170933246612549
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,fp8,3,0.35471999645233154
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,float16,7,0.6173333326975504
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,fp8,7,0.35625600814819336
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,float16,15,0.6179680029551188
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,float16,31,0.6199306646982828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,fp8,31,0.35523200035095215
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,1,0.024447999894618988
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,3,0.02478933334350586
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,1,0.020186666399240494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,3,0.02096533278624217
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,7,0.025301332275072735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,fp8,15,0.3553226788838704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,7,0.021344001094500225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,15,0.029520000020662945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,15,0.026895999908447266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,31,0.029898665845394135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,31,0.02685333291689555
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,63,0.029872000217437744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,63,0.027269333600997925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,127,0.03547733277082443
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,127,0.03183466692765554
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,255,0.051962668697039284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,255,0.047093331813812256
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,511,0.07815999786059062
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,511,0.08469333251317342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,1023,0.14909332990646362
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,2047,0.27806933720906574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,1023,0.14032000303268433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,2047,0.26412800947825116
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,4095,0.5360159873962402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,4095,0.5097226699193319
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,8191,1.0038026968638103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,8191,1.0755146344502766
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,1,0.020400000115235645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,1,0.024458666642506916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,3,0.024826665719350178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,3,0.020997333029905956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,7,0.025557334224383037
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,15,0.02958400050799052
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,7,0.021344001094500225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,15,0.02683199942111969
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,31,0.029824001093705494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,31,0.02696000039577484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,63,0.027130665878454845
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,63,0.029866665601730347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,127,0.035504000882307686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,127,0.03180799881617228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,511,0.08481066425641377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,255,0.047226667404174805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,255,0.05201066533724467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,511,0.07820799946784973
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,1023,0.14038399855295816
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,1023,0.14909866452217102
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,2047,0.27819732824961346
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,4095,0.5095093250274658
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,2047,0.26426132520039874
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,4095,0.5534453392028809
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,float16,8191,1.1183306376139324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,1,float16,fp8,8191,1.0058186848958333
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,1,0.012074666718641916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,3,0.012362666428089142
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,7,0.01209066684047381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,15,0.012373333175977072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,31,0.011952000359694162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,63,0.01184533288081487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,127,0.011722666521867117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,255,0.015504000087579092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,511,0.022863999009132385
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,1023,0.03312533348798752
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,2047,0.05220800141493479
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,4095,0.08076799909273784
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,8191,0.10626133282979329
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,1,0.00707733320693175
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,1,0.010741333166758219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,3,0.007141333073377609
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,7,0.010645333677530289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,7,0.007082666580875714
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,15,0.010682666053374609
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,15,0.0074879998962084455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,31,0.011002667248249054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,31,0.007189333438873291
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,63,0.011018666128317514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,63,0.007237333183487256
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,127,0.010757333288590113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,127,0.00821333316465219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,511,0.028330666323502857
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,255,0.009962666779756546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,255,0.02144533395767212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,511,0.017765333255132038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,1023,0.04603200157483419
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,1023,0.030970667799313862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,2047,0.06474666794141133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,2047,0.03856533269087473
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,4095,0.08212266862392426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,4095,0.05710933109124502
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,8191,0.11728533109029134
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,8191,0.0839573343594869
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,1,0.033930666744709015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,1,0.02569066733121872
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,3,0.03431999931732813
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,3,0.026687999566396076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,7,0.034490667283535004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,7,0.0271519993742307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,15,0.0383840004603068
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,15,0.03445333242416382
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,31,0.038704000413417816
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,31,0.034602666894594826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,63,0.03455466777086258
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,63,0.039135999977588654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,127,0.04634666442871094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,127,0.040565334260463715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,255,0.06799999872843425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,255,0.06118399898211161
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,511,0.10191466410954793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,511,0.11066133777300517
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,1023,0.19602133830388388
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,1023,0.18450667460759482
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,2047,0.36627201239267987
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,2047,0.34838934739430744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,4095,0.725711981455485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,4095,0.6732853253682455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,8191,1.4558293024698894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,8191,1.3305493195851643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,1,0.012245333443085352
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,3,0.012522666404644648
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,1,0.009290666629870733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,7,0.012261333564917246
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,3,0.009114666531483332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,7,0.009653333574533463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,15,0.012304000556468964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,15,0.009594666461149851
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,63,0.0122079998254776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,31,0.012335999558369318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,63,0.009541333342591921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,127,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,127,0.010890666395425797
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,31,0.009429333110650381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,255,0.011920000116030375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,255,0.01626666635274887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,511,0.023408000667889912
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,511,0.022463999688625336
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,1023,0.041375999649365745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,1023,0.033904001116752625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,2047,0.05894933144251505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,2047,0.05342933535575867
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,4095,0.0824533353249232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,4095,0.11008000373840332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,8191,0.10729599992434184
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,8191,0.1364479959011078
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,1,0.011157333850860596
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,3,0.011114666859308878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,1,0.00702400008837382
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,7,0.01129066695769628
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,7,0.007029333462317784
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,15,0.011306667079528173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,15,0.007007999966541926
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,3,0.007327999919652939
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,31,0.011205332974592844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,31,0.007002666592597961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,63,0.010933333386977514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,127,0.007850666840871176
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,127,0.01090666651725769
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,511,0.02888533224662145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,63,0.008090666805704435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,1023,0.04654933512210846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,511,0.01852799952030182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,1023,0.03294933338960012
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,2047,0.04355733096599579
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,4095,0.08444799979527791
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,8191,0.1204853355884552
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,8191,0.0872320036093394
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,255,0.021183999876181286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,255,0.010458666831254959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,2047,0.06580266853173573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,4095,0.0590826670328776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,1,0.007184000064929326
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,3,0.006810666372378667
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,15,0.010122666756312052
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,7,0.00697066696981589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,31,0.007391999786098798
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,1,0.015824000040690105
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,63,0.007125333572427432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,127,0.015813333292802174
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,15,0.015781333049138386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,63,0.015413332730531693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,31,0.015610666324694952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,7,0.015861333658297855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,3,0.015967999895413715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,127,0.007482666522264481
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,255,0.030207999050617218
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,511,0.046426668763160706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,511,0.02146666745344798
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,2047,0.08312533299128215
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,1023,0.061008001367251076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,4095,0.11943999926249187
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,4095,0.06002133091290792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,1023,0.028597332537174225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,255,0.014933332800865173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,8191,0.19051200151443481
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,1,0.008314666648705801
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,1,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,2047,0.04074133435885111
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,3,0.008341333518425623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,3,0.009338666374484697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,63,0.0099093330403169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,7,0.009455999980370203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,15,0.008394666636983553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,15,0.009519999846816063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,31,0.009216000015536943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,8191,0.09918399651845296
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,31,0.010351999973257383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,63,0.011946666985750198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,127,0.009381333366036415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,255,0.010373333469033241
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,127,0.012069333344697952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,255,0.012042666474978128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,1023,0.029792000850041706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,511,0.016805333395799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,511,0.01695466662446658
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,4095,0.05117866893609365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,7,0.008901333436369896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,1023,0.029525332152843475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,2047,0.03757333258787791
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,2047,0.037077332536379494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,4095,0.05213333169619242
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,16383,0.1402506629625956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,8191,0.08193066716194153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,8191,0.07972800234953563
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,16383,0.13615467151006064
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,32767,0.24954134225845337
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,32767,0.25760000944137573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,1,0.00949866697192192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,3,0.009509333098928133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,3,0.008383999889095625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,1,0.008303999900817871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,31,0.009322666873534521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,15,0.009423999736706415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,7,0.008293333152929941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,31,0.010314666976531347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,63,0.009322666873534521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,7,0.009450666606426239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,511,0.01672533278663953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,127,0.012159999459981918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,127,0.009328000247478485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,15,0.008447999755541483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,255,0.010309333602587381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,255,0.012138667205969492
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,1023,0.029738667110602062
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,511,0.016613333175579708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,1023,0.029605334003766377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,2047,0.03756266583998998
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,63,0.012063999970753988
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,4095,0.01815466706951459
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,8191,0.0223786657055219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,4095,0.01841066653529803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,8191,0.022090665996074677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,16383,0.02666666607062022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,16383,0.024336000283559162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,32767,0.03070399910211563
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,1,0.009413333609700203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,2047,0.037045332292715706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,3,0.009445333232482275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,7,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,1,0.008240000034372011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,15,0.008703999842206636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,32767,0.03421866645415624
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,3,0.008527999743819237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,63,0.009365333244204521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,31,0.01020800011853377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,31,0.009237333511312803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,63,0.012186666329701742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,7,0.009482666850090027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,255,0.010650667051474253
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,15,0.009541333342591921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,511,0.016607999801635742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,1023,0.029578665892283123
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,255,0.012117333710193634
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,511,0.016810666769742966
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,127,0.009306666751702627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,2047,0.015103999525308609
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,1023,0.02972800036271413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,127,0.012266666938861212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,4095,0.015882667154073715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,2047,0.011391999820868174
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,8191,0.01951466624935468
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,4095,0.013077333569526672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,8191,0.01590399940808614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,16383,0.021104000508785248
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,16383,0.0191040001809597
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,32767,0.027669332921504974
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,32767,0.03443733354409536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,15,0.00961599995692571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,1,0.008181333541870117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,3,0.009455999980370203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,1,0.009472000102202097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,3,0.008245333408315977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,7,0.008298666526873907
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,7,0.00943999985853831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,31,0.009232000137368837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,63,0.012304000556468964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,63,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,127,0.012133333832025528
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,127,0.009375999992092451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,255,0.011999999483426413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,31,0.010224000240365664
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,255,0.010591999938090643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,15,0.009754666437705358
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,2047,0.01533866673707962
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,1023,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,511,0.016586666305859882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,2047,0.009519999846816063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,4095,0.016074666132529575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,8191,0.025066666305065155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,4095,0.011007999380429586
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,8191,0.014576000471909841
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,32767,0.02769600103298823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,511,0.016879999389251072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,16383,0.029829333225886028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,1023,0.008512000242869059
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,16383,0.017386666188637417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,32767,0.047370667258898415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,3,0.00874133345981439
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,1,0.008522666369875273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,1,0.009525333220760027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,31,0.009306666751702627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,3,0.009632000078757605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,7,0.00867733359336853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,15,0.008752000207702318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,7,0.00973866693675518
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,15,0.009733333562811216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,31,0.010533332824707031
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,63,0.012261333564917246
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,63,0.009434666484594345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,127,0.00972800018886725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,127,0.012330666184425354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,1023,0.03263466556866964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,255,0.01219733307758967
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,255,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,511,0.028223998844623566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,511,0.02809600035349528
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,1023,0.032629333436489105
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,4095,0.06624533236026764
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,2047,0.04417600234349569
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,2047,0.0436106671889623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,4095,0.0653706689675649
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,8191,0.11001599828402202
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,8191,0.10775466759999593
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,32767,0.3736480077107747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,16383,0.19803200165430704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,16383,0.19248533248901367
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,7,0.008634666601816813
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,1,0.008527999743819237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,32767,0.36258665720621747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,1,0.00961599995692571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,3,0.009743999689817429
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,3,0.008570666735370954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,7,0.009519999846816063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,15,0.00877333308259646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,31,0.009450666606426239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,31,0.010682666053374609
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,63,0.009488000224033991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,63,0.012256000190973282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,127,0.009663999701539675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,255,0.010645333677530289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,127,0.012282667060693106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,15,0.009749333063761393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,255,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,511,0.02829866607983907
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,511,0.028005334238211315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,4095,0.01916266605257988
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,1023,0.03263466556866964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,2047,0.01829333355029424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,2047,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,16383,0.02624000112215678
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,4095,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,8191,0.023743999501069386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,1023,0.03266133368015289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,8191,0.022277332842350006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,16383,0.026858667532602947
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,32767,0.04472533365090688
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,1,0.008469333251317343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,32767,0.0498933345079422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,15,0.009797333429257074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,3,0.009509333098928133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,3,0.008687999720374743
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,1,0.009626666704813639
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,7,0.008389333263039589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,7,0.009488000224033991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,15,0.008714666590094566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,31,0.010421333213647207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,31,0.009525333220760027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,63,0.009701333319147428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,63,0.012351999680201212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,511,0.028357334434986115
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,127,0.012378666549921036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,127,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,255,0.012240000069141388
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,255,0.010794666906197866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,2047,0.015962666521469753
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,511,0.0281333327293396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,1023,0.013744000345468521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,1023,0.010224000240365664
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,4095,0.016895999511082966
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,4095,0.013327999661366144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,2047,0.011717333147923151
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,8191,0.026181332767009735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,8191,0.018565333137909572
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,3,0.009626666704813639
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,16383,0.022554665803909302
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,16383,0.03160000095764796
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,32767,0.05023466547330221
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,1,0.008463999877373377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,1,0.00956266683836778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,3,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,32767,0.04146666576464971
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,7,0.00855466661353906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,7,0.009583999713261923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,15,0.00985599992175897
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,15,0.008762666955590248
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,127,0.009829333052039146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,31,0.009343999748428663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,63,0.012165332833925882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,31,0.010480000327030817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,63,0.009423999736706415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,511,0.012842666357755661
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,127,0.012250666817029318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,511,0.008000000069538752
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,255,0.012319999436537424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,1023,0.013701333353916803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,2047,0.010858666151762009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,1023,0.008693333094318708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,2047,0.02042666698495547
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,16383,0.028789333999156952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,4095,0.012768000364303589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,8191,0.03502399971087774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,8191,0.01701333373785019
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,16383,0.05131733417510986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,1,0.059989333152770996
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,32767,0.07758933305740356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,32767,0.04147200038035711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,4095,0.023845332364241283
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,7,0.05048533280690511
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,1,0.04670399924119314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,3,0.048063998421033226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,3,0.06105599800745646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,7,0.061861331264177956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,15,0.06367999811967213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,15,0.07114666700363159
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,31,0.06384533147017162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,31,0.07182399928569794
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,63,0.07259733478228252
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,63,0.06401066482067108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,127,0.08633599678675334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,127,0.0759093314409256
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,511,0.21215999126434326
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,255,0.12877333164215088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,255,0.11594133575757344
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,511,0.19716266791025797
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,4095,1.4099520047505696
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,1023,0.381440003712972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,1023,0.360154668490092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,2047,0.7295573552449545
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,2047,0.6838239828745524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,1,0.01292266696691513
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,4095,1.3281280199686687
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,3,0.013274667163689932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,1,0.009328000247478485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,3,0.009429333110650381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,7,0.013274667163689932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,7,0.009354666496316591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,15,0.01320533330241839
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,31,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,15,0.009232000137368837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,31,0.013002666334311167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,255,0.021269333859284718
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,63,0.012901333471139273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,63,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,127,0.012773333738247553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,127,0.0100426667680343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,255,0.025968000292778015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,1023,0.057802667220433555
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,511,0.03485333422819773
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,511,0.040031999349594116
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,1023,0.07393066585063934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,2047,0.10384533802668254
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,2047,0.08155199885368347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,1,0.016613333175579708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,4095,0.10817066828409831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,4095,0.1362986663977305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,1,0.007962666451931
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,3,0.016069332758585613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,15,0.016634666671355564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,3,0.007813333223263422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,7,0.01613866661985715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,15,0.007701333612203598
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,7,0.007802666475375493
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,31,0.01666133354107539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,63,0.00808533343176047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,31,0.00772266648709774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,63,0.016016000260909397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,127,0.01647466669480006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,511,0.03200533241033554
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,255,0.03149333347876867
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,127,0.008618666479984919
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,511,0.04796266555786133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,255,0.017845333864291508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,2047,0.05907199780146281
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,1023,0.06274666885534923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,1023,0.04120533416668574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,2047,0.0845973292986552
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,4095,0.12152533729871114
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,3,0.02621866762638092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,4095,0.0869653324286143
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,1,0.026208000878492992
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,1,0.010533332824707031
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,7,0.025749333202838898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,3,0.010288000106811523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,15,0.010330666477481524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,7,0.010442666709423065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,15,0.025829332570234936
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,31,0.025722667574882507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,31,0.010378666842977205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,127,0.011994666109482447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,63,0.025946666797002155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,63,0.010405333091815313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,127,0.02611733227968216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,255,0.049679999550183616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,1023,0.08130133152008057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,255,0.021520001192887623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,511,0.06467733283837636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,511,0.027866666515668232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,1023,0.04168533285458883
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,2047,0.12057600418726604
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,2047,0.060138667623202004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,4095,0.09938133756319682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,4095,0.19193599621454874
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,1,0.013669333110253016
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,1,0.009061333412925402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,15,0.014352000008026758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,3,0.0138026662170887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,3,0.009114666531483332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,7,0.013866666704416275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,7,0.009029333169261614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,15,0.009685333197315535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,63,0.01591466615597407
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,31,0.011770666887362799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,31,0.015397333850463232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,63,0.011578666667143503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,511,0.02480533222357432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,127,0.01173866664369901
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,127,0.01618133361140887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,255,0.018309333672126133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,255,0.013722666849692663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,2047,0.06188266475995382
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,511,0.018800000349680584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,1023,0.03702933341264725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,1023,0.029487999776999157
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,2047,0.05008533100287119
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,4095,0.11064533392588298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,8191,0.1742186745007833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,4095,0.09169600407282512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,8191,0.20785067478815714
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,16383,0.40349864959716797
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,1,0.00884799969693025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,16383,0.33900801340738934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,32767,0.908682664235433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,3,0.01379199946920077
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,32767,0.7033387025197347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,1,0.013797332843144735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,7,0.009072000160813332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,3,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,7,0.013690666606028875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,63,0.011754666765530905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,15,0.014165333161751429
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,15,0.009658666948477427
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,31,0.015541333705186844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,31,0.01163200040658315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,127,0.016095999628305435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,63,0.016016000260909397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,255,0.018357332795858383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,127,0.011706666400035223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,2047,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,511,0.024677333732446034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,511,0.018944000204404194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,255,0.01340266689658165
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,4095,0.020821332931518555
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,1023,0.013967999567588171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,1023,0.01624533285697301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,2047,0.01998399943113327
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,4095,0.018272000054518383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,1,0.00877333308259646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,8191,0.03278400003910065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,8191,0.03268799930810928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,16383,0.04009599983692169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,16383,0.03944533318281174
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,32767,0.06604266663392384
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,32767,0.06425066788991292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,1,0.01394133393963178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,3,0.013951999445756277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,63,0.01613333324591319
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,3,0.008992000172535578
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,7,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,7,0.013663999736309052
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,15,0.014197333405415217
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,31,0.01573333392540614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,31,0.01192533348997434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,15,0.009754666437705358
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,63,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,127,0.01184533288081487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,127,0.016048000504573185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,1023,0.010474666953086853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,255,0.018330667167901993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,255,0.013461332768201828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,511,0.012997332960367203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,511,0.009466666728258133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,1023,0.013722666849692663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,4095,0.02605866640806198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,2047,0.022474666436513264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,2047,0.01303999995191892
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,4095,0.015381333728631338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,8191,0.036650667587916054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,32767,0.08145066599051158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,8191,0.027162666122118633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,16383,0.034048000971476235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,16383,0.053823997577031456
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,32767,0.060458665092786155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,3,0.008986666798591614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,1,0.013834666460752487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,1,0.008938666433095932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,3,0.013882666826248169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,31,0.015568000574906668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,7,0.008922666932145754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,7,0.013834666460752487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,15,0.014405333747466406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,15,0.009898666913310686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,31,0.011813333878914515
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,127,0.01611199975013733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,63,0.016000000139077503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,63,0.011839999506870905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,127,0.01179733375708262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,255,0.012885333349307379
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,255,0.007621333623925845
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,1023,0.01940800001223882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,511,0.013343999783198038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,1023,0.009866666669646898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,511,0.008421333506703377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,8191,0.05863999823729197
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,2047,0.028581333657105763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,2047,0.012250666817029318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,4095,0.04028266668319702
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,4095,0.01977066695690155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,32767,0.12338667114575703
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,8191,0.026159999271233875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,16383,0.08664000034332275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,16383,0.04422933359940847
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,1,0.1127839982509613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,32767,0.06592533489068349
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,1,0.08871466914812724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,3,0.0911253293355306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,3,0.11544000109036763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,7,0.11707199613253276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,7,0.09550933043162028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,63,0.1400159994761149
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,15,0.1371946632862091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,15,0.12178132931391399
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,31,0.1388213336467743
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,31,0.12165866295496623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,63,0.12250133355458577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,255,0.24940266211827597
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,127,0.1669600009918213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,127,0.14480533202489218
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,255,0.2233120004336039
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,511,0.4124533335367839
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,511,0.38256001472473145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,2047,1.4221973419189453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,1023,0.7565386295318604
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,1023,0.7021066347757975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,2047,1.339461326599121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,1,0.020266667008399963
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,7,0.020629333953062694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,1,0.015962666521469753
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,3,0.020373333245515823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,3,0.015978666643301647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,7,0.016309333344300587
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,15,0.020746666938066483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,31,0.01629866659641266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,15,0.01623999948302905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,63,0.020373333245515823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,31,0.02051199972629547
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,63,0.016085332880417507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,127,0.020389333367347717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,127,0.017610666652520496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,255,0.03908800085385641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,255,0.03886399914820989
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,511,0.05973866581916809
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,511,0.0734506646792094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,1023,0.1095360020796458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,2047,0.10797333717346191
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,1,0.02770666778087616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,1023,0.07791466514269511
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,2047,0.1376479963461558
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,3,0.027189334233601887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,1,0.0129120002190272
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,3,0.012698666503032049
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,15,0.027488000690937042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,7,0.0271519993742307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,7,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,15,0.012746666868527731
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,31,0.027114666998386383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,127,0.02757333219051361
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,31,0.012821332861979803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,63,0.012805332740147909
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,63,0.027322667340437572
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,127,0.01481066644191742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,255,0.05186133086681366
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,255,0.0315786674618721
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,511,0.06699199974536896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,511,0.04261333247025808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,1023,0.08379200100898743
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,1023,0.060165335734685264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,2047,0.12426666418711345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,2047,0.08799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,1,0.04594666759173075
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,1,0.014698666830857595
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,3,0.045834665497144066
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,3,0.014848000059525171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,7,0.04585599899291992
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,7,0.01469333345691363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,31,0.046207999189694725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,15,0.01488000030318896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,15,0.046154667933781944
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,31,0.014757333944241205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,63,0.04569066564242045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,63,0.01488000030318896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,127,0.04669866462548574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,127,0.017818666994571686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,1023,0.056101332108179726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,255,0.053823997577031456
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,255,0.024005333582560223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,511,0.07042666773001353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,511,0.036570665736993156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,1023,0.10540800293286641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,2047,0.17590399583180746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,1,0.22099200884501138
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,2047,0.0956213374932607
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,1,0.17252800861994425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,3,0.22562666734059653
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,3,0.1771893302599589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,7,0.228928009668986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,7,0.18598934014638266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,15,0.23864533503850302
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,15,0.26890132824579877
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,31,0.2721760074297587
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,31,0.23801066478093466
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,63,0.2746773362159729
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,63,0.23954667647679648
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,127,0.32762134075164795
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,127,0.28386666377385456
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,255,0.49211732546488446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,255,0.43826134999593097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,1,0.03487999985615412
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,511,0.831397294998169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,511,0.7531946500142416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,1,0.029690665503342945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,3,0.0348693331082662
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,3,0.02938666691382726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,7,0.03498133271932602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,7,0.02935466667016347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,15,0.03509333233038584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,15,0.029391999046007793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,31,0.035088000198205314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,63,0.02941333254178365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,63,0.03477866699298223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,31,0.02958400050799052
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,127,0.03486400097608566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,127,0.03251733382542928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,255,0.06644799808661143
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,255,0.07948266466458638
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,511,0.0835040012995402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,511,0.11267200112342834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,1,0.048351998130480446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,1,0.022639999787012737
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,3,0.0487306664387385
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,3,0.0223786657055219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,7,0.04816000163555145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,7,0.022613334159056347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,15,0.022613334159056347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,15,0.04836800197760264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,31,0.048538664976755776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,31,0.022815999885400135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,63,0.04850666721661886
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,63,0.022330666581789654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,127,0.04891733328501383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,127,0.026906666656335194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,1,0.08386133114496867
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,255,0.05668266614278158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,511,0.07387199997901917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,255,0.03728000074625015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,511,0.0551146666208903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,7,0.08385599652926128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,3,0.08391466736793518
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,1,0.025888000925381977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,3,0.02640533447265625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,7,0.026101333399613697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,15,0.08379733562469482
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,31,0.08371733625729878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,15,0.02645866572856903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,31,0.025957333544890087
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,63,0.0841919978459676
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,63,0.025861332813898723
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,127,0.08582400282224019
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,255,0.09685333569844563
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,255,0.046869332591692604
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,127,0.03335466732581457
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,511,0.12905066212018332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,511,0.06619200110435486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,1,0.43530134359995526
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,1,0.3409866491953532
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,3,0.4453759988149007
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,3,0.34957865873972577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,7,0.4522560040156047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,7,0.3660373290379842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,15,0.47200000286102295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,15,0.532912015914917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,31,0.5395040114720663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,31,0.47062400976816815
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,63,0.544490655263265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,63,0.4733653465906779
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,127,0.6566133499145508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,127,0.5619999965031942
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,255,1.0024212996164958
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,255,0.8690613110860189
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,1,0.06385600070158641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,1,0.05602666735649109
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,3,0.0633546660343806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,7,0.06385066608587901
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,3,0.055359999338785805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,7,0.05779199798901876
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,15,0.06374399860699971
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,15,0.055162668228149414
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,31,0.06405866642793019
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,31,0.055946667989095054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,63,0.06446399788061778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,63,0.05690133571624756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,127,0.0662666658560435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,127,0.06469866633415222
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,255,0.07306666672229767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,255,0.10385066270828247
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,1,0.0885653297106425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,1,0.046469335754712425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,3,0.08842666943868001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,3,0.04417600234349569
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,7,0.08842133482297261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,15,0.08867200215657552
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,7,0.043247997760772705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,15,0.04295999805132548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,31,0.08853333195050557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,31,0.046469335754712425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,63,0.08906666437784831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,63,0.04409599800904592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,127,0.09061333537101746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,127,0.06146133442719778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,255,0.10326400399208069
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,255,0.07738133271535237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,1,0.15583999951680502
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,1,0.05529599885145823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,3,0.15576000014940897
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,3,0.055589333176612854
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,7,0.15643200278282166
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,15,0.15665066242218018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,7,0.05529066423575083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,15,0.0558186670144399
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,31,0.1590506633122762
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,31,0.055018668373425804
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,63,0.15999466180801392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,63,0.05551466842492422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,127,0.16134933630625406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,127,0.06539200246334076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,255,0.18318933248519897
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,255,0.0839413305123647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,1,0.013349333157142004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,1,0.014778666198253632
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,3,0.013669333110253016
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,3,0.014938666174809137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,7,0.013674666484196981
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,15,0.014271999398867289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,7,0.01482133318980535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,15,0.01617066686352094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,31,0.014229333649079004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,31,0.019946667055288952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,127,0.019978666057189304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,63,0.01451733335852623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,127,0.01669866715868314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,63,0.020058666666348774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,255,0.022602667411168415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,255,0.023311999936898548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,1023,0.05936533212661743
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,511,0.03442133218050003
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,511,0.03373866776625315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,1023,0.05434666574001312
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,2047,0.10777599612871806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,2047,0.094842662413915
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,4095,0.20452266931533813
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,4095,0.17548267046610513
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,8191,0.3993493318557739
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,8191,0.337775985399882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,16383,0.8677546977996826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,16383,0.6628906726837158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,3,0.013616000612576803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,32767,3.0976479848225913
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,1,0.013359999905029932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,32767,1.453477382659912
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,1,0.014826666563749313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,3,0.014842666685581207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,15,0.013946666071812311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,7,0.013557333499193192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,7,0.014970666418472925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,31,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,15,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,63,0.014485333114862442
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,31,0.020058666666348774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,127,0.016597333053747814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,63,0.01977066695690155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,127,0.020069333414236706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,255,0.022687998910744984
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,255,0.023413332800070446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,511,0.01509333277742068
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,511,0.012543999900420507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,1023,0.013925333817799887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,1023,0.01589866727590561
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,2047,0.025242666403452556
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,2047,0.02436800052722295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,4095,0.03028800090154012
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,4095,0.028175999720891316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,8191,0.0458133320013682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,16383,0.06482666730880737
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,8191,0.04445866743723551
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,16383,0.06746133168538411
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,32767,0.10310932993888855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,3,0.015061333775520325
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,1,0.013637332866589228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,1,0.01470400020480156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,3,0.013594667116800943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,32767,0.12801067034403482
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,7,0.01349866638580958
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,7,0.015040000279744467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,15,0.01634666696190834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,15,0.014165333161751429
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,31,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,31,0.019920000185569126
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,63,0.014618666221698126
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,127,0.016778666526079178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,63,0.019904000063737232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,127,0.020101333657900494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,255,0.012629333883523941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,255,0.009125333279371262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,511,0.013594667116800943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,511,0.009701333319147428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,1023,0.019738666713237762
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,4095,0.0412266676624616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,1023,0.011802667131026586
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,2047,0.02889599899450938
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,2047,0.020037333170572918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,4095,0.023733332753181458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,16383,0.06264000137646993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,8191,0.05890666445096334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,8191,0.036917333801587425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,16383,0.0879200001557668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,32767,0.12477333347002666
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,1,0.010768000036478043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,32767,0.09821866949399312
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,3,0.011413333316644033
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,1,0.0063680000603199005
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,3,0.00625599982837836
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,7,0.010368000095089277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,7,0.006501333167155583
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,15,0.010565333068370819
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,15,0.0064266665528217954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,31,0.0063786668082078295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,63,0.010527999450763067
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,31,0.01725333308180173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,63,0.006186666587988536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,127,0.010565333068370819
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,127,0.008080000057816505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,511,0.0194560003777345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,255,0.008154666672150293
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,255,0.013631999492645264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,511,0.009413333609700203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,4095,0.06682666639486949
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,1023,0.015061333775520325
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,1023,0.027999999622503918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,2047,0.04297066728274027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,2047,0.018570666511853535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,4095,0.028938665986061096
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,8191,0.08417066931724548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,16383,0.11961600184440613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,8191,0.042319998145103455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,16383,0.060864001512527466
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,32767,0.19125866889953613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,32767,0.10051733255386353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,1,0.8656480312347412
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,3,0.6977706750233968
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,1,0.6806986331939697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,3,0.8882666428883871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,7,0.8995306491851807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,7,0.7287039756774902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,15,1.0663680235544841
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,15,0.9390986760457357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,31,0.9382666746775309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,31,1.0915520191192627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,63,1.1135893662770588
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,63,0.9477600256601969
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,127,1.333077351252238
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,3,0.12014399965604146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,127,1.132266680399577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,1,0.11974933743476868
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,1,0.16299200057983398
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,3,0.16338133811950684
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,7,0.12024000287055969
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,7,0.1623093287150065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,15,0.16370667020479837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,15,0.12070399522781372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,31,0.12087466319402058
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,63,0.12045866250991821
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,31,0.16400532921155295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,63,0.16306133071581522
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,127,0.1211946705977122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,1,0.16646933555603027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,127,0.17822933197021484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,1,0.10878933469454448
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,3,0.16605866948763529
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,3,0.1092800001303355
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,15,0.1090773344039917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,7,0.10900266965230306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,7,0.16702399651209512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,15,0.16777066389719644
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,31,0.16873067617416382
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,31,0.1088693340619405
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,63,0.16834133863449097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,63,0.10890666643778484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,127,0.1701493263244629
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,127,0.12325867017110188
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,1,0.30606400966644287
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,1,0.10461866855621338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,3,0.30740267038345337
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,3,0.10506133238474528
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,7,0.3086719910303752
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,7,0.10450133681297302
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,15,0.3110293348630269
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,31,0.3118826746940613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,15,0.1048959990342458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,31,0.10500799616177876
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,63,0.31010133028030396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,63,0.10412266850471497
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,127,0.31269333759943646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,127,0.12243200341860454
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,1,1.39192533493042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,3,1.8325066566467285
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,3,1.4285173416137695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,1,1.7973814010620117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,7,1.8528000513712566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,7,1.488111972808838
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,15,2.156538645426432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,15,1.8840319315592449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,31,2.1969067255655923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,31,1.8814506530761719
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,63,2.2210987408955893
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,1,0.23076266050338745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,1,0.32440000772476196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,3,0.23081066211064658
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,3,0.3240586717923482
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,63,1.8948747316996257
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,7,0.32477333148320514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,15,0.23113600413004556
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,7,0.23052799701690674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,15,0.3246026635169983
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,31,0.23099199930826822
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,31,0.32516799370447796
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,63,0.3248213330904643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,63,0.22937599817911783
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,1,0.3269386688868205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,1,0.209498663743337
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,3,0.3272106647491455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,3,0.20801599820454916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,15,0.32761599620183307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,7,0.32696533203125
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,7,0.20921599864959717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,15,0.20853332678476968
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,31,0.20933334032694498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,63,0.3255680004755656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,1,0.6127520004908243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,31,0.32772799332936603
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,63,0.20787199338277182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,1,0.20174932479858398
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,3,0.6123786767323812
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,3,0.20181334018707275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,15,0.6148480176925659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,7,0.6126453479131063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,7,0.2021226684252421
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,15,0.20202134052912393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,31,0.613701343536377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,31,0.20195200045903525
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,63,0.608458677927653
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,1,0.018874666343132656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,15,0.019296000401178997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,3,0.015189333508412043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,1,0.014720000326633453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,3,0.019167999426523846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,63,0.20094933112462363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,7,0.019280000279347103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,7,0.015568000574906668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,15,0.021397332350413006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,31,0.021541332205136616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,31,0.01950399950146675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,63,0.021914665897687275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,63,0.019519999623298645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,127,0.025381334125995636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,127,0.023007998863856
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,511,0.05840533475081126
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,255,0.03619733452796936
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,255,0.03334933271010717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,4095,0.36351466178894043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,511,0.05413866539796194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,2047,0.18888000647226968
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,1023,0.1021066705385844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,2047,0.17974932988484701
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,1023,0.09613866607348125
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,4095,0.34543999036153156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,8191,0.7474719683329264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,8191,0.678325335184733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,1,0.019002666076024372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,16383,1.8487040201822917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,1,0.014554666976133982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,3,0.015178666760524115
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,3,0.019023999571800232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,16383,1.4440479278564453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,7,0.019541333119074505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,7,0.015557333827018738
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,15,0.021344001094500225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,15,0.019472000499566395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,31,0.02145066608985265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,31,0.01929066702723503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,63,0.021962667504946392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,63,0.01947733387351036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,127,0.02292799949645996
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,127,0.025311999022960663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,2047,0.034602666894594826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,255,0.014965333044528961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,255,0.011887999872366587
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,511,0.015482666591803232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,511,0.012608000387748083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,1023,0.023103999594847362
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,1023,0.02532800038655599
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,2047,0.033557333052158356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,4095,0.04979733129342397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,4095,0.0537066658337911
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,8191,0.073594664533933
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,8191,0.08994133273760478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,16383,0.10951466361681621
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,1,0.010672000547250112
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,16383,0.13505599896113077
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,1,0.007360000163316727
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,3,0.011498666057984034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,3,0.007258666679263115
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,7,0.010506667196750641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,7,0.0069440001000960665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,15,0.010458666831254959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,15,0.007216000308593114
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,31,0.010506667196750641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,63,0.011418666690587997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,31,0.00707733320693175
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,63,0.007311999797821045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,127,0.010597333312034607
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,255,0.013621332744757334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,511,0.019498666127522785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,511,0.012442667037248611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,1023,0.028543998797734577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,127,0.00820266641676426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,255,0.009328000247478485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,1023,0.019765333582957584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,2047,0.04347200194994608
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,4095,0.04241600135962168
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,2047,0.027600000301996868
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,4095,0.06763199965159099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,8191,0.060005332032839455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,8191,0.08622933427492778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,16383,0.12177600463231404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,1,0.00873066671192646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,1,0.010832000523805618
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,3,0.0064106664309899015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,7,0.01098666712641716
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,16383,0.08779199918111165
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,7,0.006250666454434395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,15,0.008522666369875273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,15,0.011055999745925268
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,31,0.010874666273593903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,31,0.0064853330453236895
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,63,0.01073066641887029
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,63,0.006186666587988536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,255,0.020560000091791153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,127,0.011045332998037338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,127,0.0069333333522081375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,255,0.008901333436369896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,511,0.028218666712443035
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,2047,0.06512533128261566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,511,0.014805333067973455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,1023,0.04524800181388855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,1023,0.02181333303451538
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,2047,0.027957332630952198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,4095,0.08311999837557475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,8191,0.06233599781990051
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,4095,0.0408746674656868
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,8191,0.11878400047620137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,16383,0.18984532356262207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,16383,0.09996267159779866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,1,0.01883200059334437
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,1,0.01482133318980535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,3,0.019178666174411774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,3,0.015029333531856537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,7,0.01932799940307935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,7,0.01573333392540614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,15,0.021216000119845074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,31,0.021573332448800404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,15,0.019733333339293797
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,31,0.01961600035429001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,63,0.019551999866962433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,63,0.021829334398110706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,127,0.023024000227451324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,127,0.02531733363866806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,1023,0.023210667073726654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,255,0.014666666587193808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,255,0.011685332904259363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,511,0.015520000209410986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,1023,0.025386666258176167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,511,0.012602667013804117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,4095,0.050053333242734276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,2047,0.03509333233038584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,2047,0.033520000676314034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,4095,0.05337599913279215
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,8191,0.07400000095367432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,8191,0.0851146678129832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,16383,0.11060800155003865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,16383,0.13481066624323526
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,1,0.011674666156371435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,1,0.007530666887760162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,3,0.00706666645904382
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,7,0.010426666587591171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,3,0.010351999973257383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,7,0.007061333085099856
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,31,0.011584000041087469
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,15,0.01032533310353756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,15,0.007296000296870868
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,31,0.0075626665105422335
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,63,0.010314666976531347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,255,0.013679999858140945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,63,0.006949333474040031
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,127,0.010501333822806677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,127,0.0075519997626543045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,255,0.01003200002014637
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,511,0.01970133309563001
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,1023,0.019727999965349834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,511,0.010981333752473196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,1023,0.02811199923356374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,2047,0.0439573327700297
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,2047,0.02759466568628947
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,4095,0.06763199965159099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,4095,0.04223999877770742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,8191,0.08653333783149719
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,8191,0.060362666845321655
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,16383,0.08749333024024963
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,1,0.010933333386977514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,16383,0.12197867035865784
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,1,0.006319999694824219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,3,0.010960000256697336
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,3,0.006442666674653689
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,7,0.008762666955590248
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,7,0.010826667149861654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,15,0.010805333654085795
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,127,0.011434666812419891
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,15,0.0063040001938740415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,31,0.006565333033601443
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,31,0.010821333775917688
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,63,0.010901333143313726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,63,0.0063040001938740415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,127,0.006768000001708667
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,255,0.010053333515922228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,255,0.021114667256673176
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,511,0.014592000593741735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,511,0.02809600035349528
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,1023,0.02187199890613556
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,2047,0.06542400022347768
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,1023,0.045194665590922035
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,2047,0.027664000789324444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,4095,0.0835040012995402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,4095,0.041178666055202484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,8191,0.12001599868138631
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,8191,0.06044266621271769
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,16383,0.19108267625172934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,16383,0.09961066643397014
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,1,0.006762666627764702
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,3,0.006474666918317477
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,7,0.00655466690659523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,15,0.006629333520929019
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,31,0.006704000135262807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,63,0.006415999804933866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,127,0.007349333415428798
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,1,0.015493333339691162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,127,0.015765332927306492
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,31,0.015706667055686314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,15,0.01563199982047081
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,7,0.01565333331624667
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,3,0.015962666521469753
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,63,0.015376000354687372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,511,0.045925334095954895
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,255,0.029765332738558452
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,511,0.02402666707833608
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,1023,0.061306665341059365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,2047,0.08363200227419536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,255,0.013738666971524557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,2047,0.043738668163617454
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,4095,0.11868266264597575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,4095,0.06679466863473256
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,8191,0.1901866594950358
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,16383,0.33153067032496136
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,1023,0.029872000217437744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,16383,0.20260266462961832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,1,0.00808533343176047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,1,0.009253333633144697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,3,0.008069333309928576
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,7,0.009477333476146063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,3,0.00943999985853831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,7,0.009119999905427298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,8191,0.11230400204658508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,15,0.008453333129485449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,15,0.009434666484594345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,63,0.010357333347201347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,31,0.00895999992887179
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,31,0.009919999788204828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,63,0.012042666474978128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,127,0.009114666531483332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,255,0.010458666831254959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,511,0.016586666305859882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,255,0.011978667229413986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,127,0.012085333466529846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,1023,0.01775466650724411
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,1023,0.0176959993938605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,511,0.01639466608564059
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,4095,0.04045866678158442
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,2047,0.029669334491093952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,2047,0.029616000751654308
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,4095,0.0397119993964831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,8191,0.02199999988079071
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,8191,0.022106667359670002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,16383,0.02430933217207591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,16383,0.02666666607062022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,32767,0.030565333863099415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,32767,0.033770665526390076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,65535,0.03737599899371465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,65535,0.0435146689414978
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,1,0.008197333042820295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,1,0.009493333597977957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,7,0.009306666751702627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,3,0.008245333408315977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,3,0.009237333511312803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,7,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,15,0.008469333251317343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,15,0.009477333476146063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,63,0.010213333492477735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,31,0.009072000160813332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,31,0.01022933361430963
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,63,0.011823999385039011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,127,0.009141333401203156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,255,0.0102613332370917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,127,0.011909333368142446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,255,0.01201066623131434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,511,0.0164533331990242
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,511,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,1023,0.017978666971127193
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,1023,0.017498667041460674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,8191,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,2047,0.029525332152843475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,2047,0.029738667110602062
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,4095,0.013189333180586496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,8191,0.015967999895413715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,4095,0.015589332828919092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,16383,0.02075200031201045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,16383,0.01899733394384384
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,32767,0.025279998779296875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,32767,0.024512000381946564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,65535,0.030954666435718536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,65535,0.030218665798505146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,1,0.008016000191370646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,7,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,3,0.009354666496316591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,3,0.008176000167926153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,7,0.00943999985853831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,15,0.00842666688064734
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,31,0.00921066664159298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,1,0.010037333394090334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,15,0.009599999835093817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,31,0.011120000233252844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,63,0.009119999905427298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,127,0.009397333487868309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,63,0.012037333101034164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,127,0.011909333368142446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,255,0.01028266673286756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,511,0.016389333953460056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,511,0.016517333686351776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,1023,0.017685333887736004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,1023,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,2047,0.009306666751702627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,255,0.011989332735538483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,2047,0.014959999670584997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,8191,0.019578666736682255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,4095,0.015541333705186844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,4095,0.010575999816258749
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,8191,0.012762666990359625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,16383,0.015279999623696009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,16383,0.021695998807748158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,32767,0.021695998807748158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,32767,0.034474665919939675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,1,0.00808533343176047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,65535,0.04621333380540212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,1,0.009349333122372627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,65535,0.027024000883102417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,3,0.008266666904091835
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,3,0.009461333354314169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,7,0.008298666526873907
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,15,0.008485333373149237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,7,0.009482666850090027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,63,0.009312000125646591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,31,0.00921066664159298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,31,0.010234666367371878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,63,0.011936000237862269
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,255,0.0103946669648091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,127,0.01219733307758967
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,127,0.009232000137368837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,1023,0.013573333621025085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,255,0.0120319997270902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,511,0.016688000410795212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,511,0.01643199970324834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,1023,0.008645333349704742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,2047,0.015216000378131866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,4095,0.016117333124081295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,2047,0.009343999748428663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,4095,0.0103946669648091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,15,0.010378666842977205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,16383,0.029893333713213604
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,8191,0.025583999852339428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,8191,0.01421333352724711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,16383,0.017301333447297413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,32767,0.04854933420817057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,1,0.00878399983048439
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,1,0.00933333362142245
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,32767,0.03204799940188726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,65535,0.07147733370463054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,3,0.008463999877373377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,65535,0.0432586669921875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,7,0.009301333377758661
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,7,0.008298666526873907
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,3,0.009322666873534521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,15,0.008463999877373377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,31,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,15,0.00956266683836778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,31,0.010101333260536194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,63,0.010250666489203772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,63,0.012047999848922094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,127,0.009397333487868309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,127,0.011909333368142446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,255,0.010480000327030817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,255,0.012026666353146235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,1023,0.029546665648619335
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,511,0.016949333250522614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,511,0.01669866715868314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,1023,0.02977066735426585
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,2047,0.03763733307520548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,2047,0.03691199918588003
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,4095,0.019023999571800232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,4095,0.018218666315078735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,8191,0.02277333289384842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,8191,0.021946666141351063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,16383,0.026474667092164356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,16383,0.025418666501839954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,32767,0.032111999889214836
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,32767,0.03437866767247518
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,1,0.008367999767263731
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,65535,0.04412800073623657
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,3,0.008240000034372011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,1,0.009594666461149851
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,65535,0.0391146664818128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,3,0.009418666362762451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,7,0.009248000259200731
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,7,0.008458666503429413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,15,0.008602666358153025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,31,0.009290666629870733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,15,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,31,0.010490667074918747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,63,0.009370666618148485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,63,0.01198400060335795
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,127,0.011941333611806234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,127,0.009434666484594345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,255,0.012213333199421564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,511,0.01672533278663953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,511,0.016864000509182613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,1023,0.029904000461101532
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,1023,0.029578665892283123
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,4095,0.01632000009218852
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,2047,0.015610666324694952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,2047,0.011498666057984034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,4095,0.013317332913478216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,8191,0.01972266659140587
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,16383,0.022117334107557934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,16383,0.019199999670187633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,8191,0.01597333326935768
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,32767,0.03548266738653183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,32767,0.027621333797772724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,65535,0.04807466765244802
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,65535,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,1,0.0085333331177632
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,1,0.00938666673998038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,3,0.008373333141207695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,7,0.00961599995692571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,3,0.009455999980370203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,7,0.009621333330869675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,15,0.008405333384871483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,15,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,63,0.012154666086037954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,31,0.010181333248813948
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,63,0.009589333087205887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,31,0.009338666374484697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,127,0.009434666484594345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,127,0.012181332955757776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,255,0.010431999961535135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,255,0.012250666817029318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,511,0.01682666689157486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,1023,0.013317332913478216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,511,0.01653333380818367
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,1023,0.008469333251317343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,2047,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,2047,0.015840000162522
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,8191,0.02554133286078771
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,4095,0.016842667013406754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,4095,0.01080000028014183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,8191,0.014709333578745524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,16383,0.03090133269627889
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,32767,0.049685334165891014
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,16383,0.017840000490347546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,1,0.008400000010927519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,65535,0.07367466886838277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,65535,0.050373335679372154
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,32767,0.026373334228992462
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,3,0.008218666538596153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,1,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,3,0.009525333220760027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,15,0.008367999767263731
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,7,0.00956266683836778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,7,0.008367999767263731
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,15,0.009663999701539675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,31,0.009226666763424873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,127,0.009365333244204521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,63,0.012335999558369318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,31,0.010293333480755487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,63,0.009445333232482275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,127,0.012159999459981918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,511,0.01293333371480306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,255,0.012063999970753988
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,255,0.010709332923094431
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,511,0.008133333176374435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,1023,0.013306666165590286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,2047,0.010330666477481524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,1023,0.008250666782259941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,2047,0.02012266715367635
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,4095,0.011786667009194693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,8191,0.020768000433842342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,4095,0.023605334262053173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,8191,0.0353973334034284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,16383,0.026015999416510265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,16383,0.051039998730023704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,32767,0.07808533310890198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,32767,0.04544533292452494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,1,0.01232533281048139
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,65535,0.12365866700808208
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,65535,0.07339733342329662
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,1,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,3,0.012373333175977072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,3,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,7,0.012213333199421564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,7,0.010431999961535135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,63,0.012362666428089142
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,15,0.012191999703645706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,15,0.00916800027092298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,31,0.012357333054145178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,31,0.009216000015536943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,63,0.009232000137368837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,127,0.012053333222866058
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,127,0.009898666913310686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,255,0.01632000009218852
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,511,0.02248000105222066
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,511,0.02333866556485494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,255,0.011866666376590729
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,1023,0.03412266572316488
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,1023,0.04165866722663244
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,4095,0.08405866225560506
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,2047,0.05433600147565206
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,2047,0.059205333391825356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,4095,0.09931199749310811
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,8191,0.10869866609573364
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,1,0.011429333438475927
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,8191,0.13595199584960938
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,3,0.011226666470368704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,1,0.007978666573762894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,3,0.007594666754206021
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,7,0.011002667248249054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,7,0.007733333234985669
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,31,0.0075519997626543045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,15,0.011194666226704916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,15,0.007642666498819987
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,31,0.010928000013033548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,63,0.011098666737476984
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,63,0.007237333183487256
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,127,0.011055999745925268
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,127,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,255,0.0216799999276797
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,255,0.010469333579142889
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,511,0.018543999642133713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,511,0.028794666131337483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,1023,0.04633066554864248
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,1023,0.032645332316557564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,2047,0.06607999900976817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,4095,0.08505599697430928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,2047,0.04178133110205332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,4095,0.0592853327592214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,8191,0.12136000394821167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,1,0.01609066625436147
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,3,0.008463999877373377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,1,0.006954666847983996
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,3,0.015872000406185787
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,7,0.01573333392540614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,8191,0.08705600102742513
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,7,0.006906666482488315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,63,0.0069919998447100324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,15,0.015989333391189575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,15,0.006911999856432279
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,31,0.01637866720557213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,63,0.015909332782030106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,127,0.015882667154073715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,31,0.007743999982873599
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,127,0.007365333537260692
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,255,0.0305173322558403
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,255,0.015146666516860327
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,511,0.021482666333516438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,1023,0.061424002051353455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,2047,0.08341333270072937
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,1023,0.02741333345572154
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,4095,0.12059733271598816
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,2047,0.04072533299525579
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,4095,0.05931733548641205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,1,0.025487999121348064
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,8191,0.09964799880981445
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,8191,0.1917440096537272
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,511,0.04618666569391886
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,3,0.02550933261712392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,1,0.010453333457310995
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,3,0.009493333597977957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,7,0.02535466601451238
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,31,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,7,0.009488000224033991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,15,0.025616000096003216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,63,0.00943999985853831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,15,0.01073066641887029
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,63,0.02550400048494339
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,127,0.02569066733121872
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,31,0.025439999997615814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,255,0.04916266600290934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,255,0.023904000719388325
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,511,0.06411733229955037
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,1023,0.04368533194065094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,511,0.030266667405764263
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,1023,0.08038400113582611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,2047,0.12008000413576762
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,4095,0.1911840041478475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,4095,0.112335999806722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,8191,0.33155200878779095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,8191,0.20356265703837076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,2047,0.06698133548100789
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,1,0.009503999724984169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,3,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,1,0.010319999729593595
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,3,0.009589333087205887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,7,0.008570666735370954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,15,0.008661333471536636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,7,0.00960533320903778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,31,0.010415999839703241
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,63,0.012138667205969492
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,127,0.012106666962305704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,31,0.010618666807810465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,63,0.010351999973257383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,127,0.009733333562811216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,255,0.010693332801262537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,15,0.010885333021481832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,255,0.012373333175977072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,511,0.0281333327293396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,511,0.028138667345046997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,1023,0.03294933338960012
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,2047,0.019834666202465694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,2047,0.015626666446526844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,1023,0.03282133241494497
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,8191,0.024911999702453613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,4095,0.017946666727463405
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,8191,0.022367998957633972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,4095,0.021045332153638203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,16383,0.02683199942111969
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,16383,0.027098665634791057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,32767,0.04691733419895172
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,32767,0.049829334020614624
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,65535,0.06161599854628245
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,1,0.008522666369875273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,65535,0.0650186687707901
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,3,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,7,0.008447999755541483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,3,0.010527999450763067
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,15,0.008629333227872849
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,7,0.009770666559537252
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,15,0.00955200009047985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,31,0.009450666606426239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,63,0.009594666461149851
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,31,0.010399999717871347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,127,0.009663999701539675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,63,0.012298667182525
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,127,0.012378666549921036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,255,0.012357333054145178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,511,0.02812266598145167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,511,0.027957332630952198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,1023,0.010133333504199982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,1023,0.013616000612576803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,255,0.010693332801262537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,2047,0.01722666621208191
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,2047,0.011722666521867117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,4095,0.018165333817402523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,4095,0.013338666409254074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,8191,0.027306665976842243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,8191,0.0186666672428449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,16383,0.03265066693226496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,32767,0.05179733534653982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,32767,0.04093866546948751
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,16383,0.022474666436513264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,65535,0.07716266810894012
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,1,0.009493333597977957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,1,0.008469333251317343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,65535,0.05648000041643778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,3,0.008463999877373377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,3,0.00961599995692571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,7,0.008698666468262672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,15,0.009685333197315535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,31,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,15,0.008778666456540426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,31,0.010431999961535135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,63,0.009610666582981745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,63,0.012175999581813812
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,7,0.009984000275532404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,127,0.00980266680320104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,127,0.012138667205969492
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,255,0.010826667149861654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,255,0.012170666207869848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,511,0.013023999830087027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,1023,0.013637332866589228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,511,0.007887999837597212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,2047,0.010762666662534079
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,2047,0.02186133215824763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,4095,0.025205334027608235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,8191,0.03602666656176249
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,4095,0.012549333274364471
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,8191,0.01756799966096878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,1023,0.008789333204428354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,16383,0.0533493310213089
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,16383,0.028538666665554047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,32767,0.08052266637484233
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,32767,0.04139200101296107
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,1,0.008597333605090777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,65535,0.07010133564472198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,3,0.008506666868925095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,65535,0.12705066800117493
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,3,0.009621333330869675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,1,0.009685333197315535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,7,0.008559999987483025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,7,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,15,0.0086666668454806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,15,0.009808000177145004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,31,0.009392000113924345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,63,0.009594666461149851
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,31,0.010437333335479101
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,127,0.009882666791478792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,127,0.012330666184425354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,255,0.012517333030700684
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,511,0.013327999661366144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,511,0.008010666817426682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,255,0.00730666642387708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,63,0.012448000411192576
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,1023,0.019189332922299702
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,1023,0.00927466650803884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,2047,0.028416000306606293
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,2047,0.015279999623696009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,4095,0.017994667092959087
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,4095,0.04038933416207632
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,8191,0.05865600208441416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,16383,0.08595732847849528
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,16383,0.046240001916885376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,32767,0.1226026713848114
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,8191,0.028751999139785767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,32767,0.07670400043328603
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,65535,0.1987839937210083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,65535,0.12819199760754904
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,1,0.012901333471139273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,1,0.009141333401203156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,3,0.01302933320403099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,7,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,3,0.009999999776482582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,15,0.013232000172138214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,7,0.013327999661366144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,31,0.012906666845083237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,15,0.00919999989370505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,127,0.009919999788204828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,31,0.009258666386206945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,63,0.013072000195582708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,127,0.012944000462690989
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,63,0.010309333602587381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,255,0.021253332495689392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,511,0.03449599941571554
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,511,0.03996799886226654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,1023,0.05723733206590017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,1023,0.07382399837176006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,255,0.02613866577545802
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,2047,0.08232533435026805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,2047,0.10612799723943074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,1,0.01651200031240781
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,4095,0.10894399881362915
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,4095,0.1358506679534912
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,1,0.007733333234985669
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,3,0.016261332978804905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,7,0.016480000068744022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,15,0.016149333367745083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,15,0.007754666730761528
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,3,0.008389333263039589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,7,0.008858666444818178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,31,0.016106666376193363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,31,0.007690666864315669
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,127,0.01621866722901662
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,63,0.01629866659641266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,63,0.008661333471536636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,127,0.008799999952316284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,1023,0.06266133487224579
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,511,0.0476746658484141
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,255,0.0179626668492953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,511,0.03170666595300039
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,1023,0.039936001102129616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,2047,0.08525866270065308
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,255,0.03145066648721695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,4095,0.12185066938400269
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,2047,0.05948266883691152
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,4095,0.08688533306121826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,3,0.026074667771657307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,1,0.02609066665172577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,3,0.010458666831254959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,7,0.010415999839703241
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,15,0.026213333010673523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,7,0.02601066728432973
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,31,0.0260959987839063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,15,0.010458666831254959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,127,0.026165333886941273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,63,0.02625600000222524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,31,0.010421333213647207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,63,0.010570666442314783
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,127,0.012080000092585882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,255,0.021802666286627453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,511,0.027999999622503918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,511,0.06530133386452992
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,1023,0.08160533507664998
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,255,0.049584001302719116
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,2047,0.06042666733264923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,1023,0.0412266676624616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,2047,0.12143466869990031
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,4095,0.09957333405812581
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,4095,0.19382399320602417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,1,0.044906665881474815
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,1,0.01639466608564059
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,3,0.04475200176239014
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,3,0.016410666207472484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,15,0.04478399952252706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,7,0.04458666841189066
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,7,0.016415999581416447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,31,0.04497600098450979
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,15,0.016496000190575916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,31,0.016469333320856094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,63,0.044400001565615334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,127,0.04525866607824961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,63,0.0164533331990242
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,255,0.025621332228183746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,511,0.06936533252398173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,127,0.019093333433071773
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,511,0.03963200002908707
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,255,0.0525546669960022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,1023,0.10362133383750916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,1023,0.06249066690603892
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,2047,0.1749173402786255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,2047,0.10786133011182149
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,4095,0.31013333797454834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,1,0.020768000433842342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,4095,0.19868266582489014
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,3,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,1,0.016063999384641647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,3,0.020506666352351505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,7,0.020762667059898376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,7,0.016271999726692837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,15,0.02053333322207133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,31,0.016048000504573185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,15,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,63,0.016074666132529575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,31,0.02046400060256322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,63,0.020207999895016353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,127,0.02056533346573512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,127,0.017653333644072216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,255,0.038906666139761605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,255,0.038736000657081604
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,511,0.07371733089288075
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,511,0.06004266440868378
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,1,0.027600000301996868
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,3,0.02718399961789449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,1023,0.0784746656815211
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,1,0.012879999975363413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,1023,0.10826133688290913
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,3,0.012810666114091873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,15,0.027589333554108936
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,7,0.027386667827765148
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,7,0.013349333157142004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,31,0.012901333471139273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,63,0.012805332740147909
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,31,0.02712533374627431
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,63,0.02679466704527537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,255,0.05259733398755392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,127,0.027589333554108936
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,127,0.01469333345691363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,511,0.06716266771157582
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,255,0.031445334355036415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,511,0.04243200023969015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,1,0.04595733185609182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,1023,0.08366933465003967
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,1023,0.06011199951171875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,1,0.014938666174809137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,3,0.04588800172011057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,3,0.015072000523408255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,7,0.014917333920796713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,15,0.04608533283074697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,7,0.046522667010625206
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,31,0.04621866842110952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,31,0.014991999914248785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,15,0.014741333822409311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,127,0.017871999492247898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,63,0.045781334241231285
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,63,0.01492799942692121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,127,0.04656533400217692
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,255,0.0544053316116333
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,255,0.024234667420387268
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,511,0.07121600210666656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,511,0.03708266715208689
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,1023,0.10533866286277771
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,1,0.02978666623433431
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,1023,0.056048000852266945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,1,0.08178133269151051
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,3,0.08201600114504497
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,3,0.029877332349618275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,7,0.08186666667461395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,7,0.029887999097506206
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,15,0.029818666477998097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,31,0.08187200129032135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,15,0.08171199758847554
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,63,0.08131733536720276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,127,0.08316266536712646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,63,0.029850666721661884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,31,0.03013866643110911
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,127,0.036650667587916054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,255,0.05146133402983347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,255,0.09400000174840291
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,511,0.12668266892433167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,1023,0.19372800985972086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,511,0.07413333157698314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,1,0.029114666084448498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,1023,0.12004799644152324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,1,0.03528533379236857
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,3,0.034927998979886375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,3,0.02924799919128418
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,7,0.029311999678611755
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,7,0.035173334181308746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,15,0.03513599932193756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,15,0.02920000006755193
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,63,0.03481066723664602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,31,0.02942933390537898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,31,0.03496533383925756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,63,0.029114666084448498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,127,0.03489066660404205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,127,0.03239466746648153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,511,0.08313600222269694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,255,0.06704000135262807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,255,0.0776693324247996
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,1,0.022495999932289124
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,1,0.04799999793370565
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,511,0.11402666568756104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,3,0.048437332113583885
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,7,0.04808000226815542
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,3,0.02258133391539256
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,15,0.022672000030676525
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,7,0.022639999787012737
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,15,0.048122664292653404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,63,0.048154667019844055
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,31,0.022597332795461018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,31,0.048010667165120445
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,63,0.022367998957633972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,127,0.026501332720120747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,127,0.0489279975493749
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,511,0.07367999851703644
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,255,0.05671999851862589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,255,0.038160001238187156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,1,0.08397333820660909
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,511,0.05533866584300995
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,1,0.026144000391165417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,3,0.08364267150561015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,3,0.025989333788553875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,7,0.0839359958966573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,7,0.026213333010673523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,31,0.026122666895389557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,15,0.08389866352081299
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,15,0.02601066728432973
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,31,0.08401067058245341
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,63,0.08377599716186523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,127,0.08627200126647949
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,63,0.025834667185942333
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,255,0.09795733292897542
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,127,0.033002667129039764
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,1,0.15124799807866415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,255,0.04671466847260793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,511,0.13033599654833475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,511,0.06609066824118297
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,1,0.061349332332611084
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,3,0.062463998794555664
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,3,0.15155200163523355
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,7,0.06193066636721293
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,7,0.15204266707102457
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,15,0.1516693333784739
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,31,0.06158933540185293
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,31,0.15308800339698792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,15,0.06243733565012614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,63,0.15408000349998474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,63,0.062122667829195656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,127,0.15461333592732748
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,127,0.07474666833877563
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,255,0.09649599591890971
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,1,0.014096000542243322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,255,0.17666133244832358
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,3,0.013893333574136099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,511,0.23921066522598267
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,511,0.1416000028451284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,1,0.0100426667680343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,7,0.009125333279371262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,3,0.010005333150426546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,7,0.01394133393963178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,15,0.014208000153303146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,15,0.009808000177145004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,63,0.011749333391586939
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,31,0.015743999431530636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,63,0.01584533353646596
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,31,0.011813333878914515
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,127,0.011861333002646765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,127,0.016362667083740234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,511,0.024656000236670177
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,255,0.013776000589132309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,255,0.018357332795858383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,511,0.018837332725524902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,2047,0.020303999384244282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,1023,0.015482666591803232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,4095,0.0210506667693456
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,1023,0.013744000345468521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,2047,0.01575999955336253
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,4095,0.018346666047970455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,8191,0.033039999504884086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,8191,0.03291733314593633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,16383,0.04029333343108495
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,32767,0.06488533318042755
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,16383,0.04015466570854187
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,65535,0.1111946702003479
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,32767,0.06602666775385539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,65535,0.09853333234786987
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,1,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,3,0.009114666531483332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,1,0.009914666414260864
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,3,0.013957332819700241
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,7,0.014074667046467463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,7,0.009050666665037474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,15,0.009733333562811216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,31,0.015557333827018738
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,15,0.01421333352724711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,31,0.011786667009194693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,63,0.011717333147923151
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,127,0.01602666700879733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,63,0.016165333489576977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,127,0.011786667009194693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,255,0.018357332795858383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,255,0.013471999516089758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,511,0.009488000224033991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,511,0.012885333349307379
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,1023,0.013717333475748697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,1023,0.01044800008336703
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,2047,0.022255999346574146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,2047,0.012970666090647379
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,4095,0.025802666942278545
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,4095,0.015333333363135656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,8191,0.036602665980656944
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,8191,0.02700799951950709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,16383,0.05471999943256378
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,16383,0.03431999931732813
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,32767,0.0610453337430954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,32767,0.08149866759777069
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,1,0.014053333550691605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,65535,0.12827733159065247
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,1,0.008778666456540426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,65535,0.09484799702962239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,3,0.014015999933083853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,3,0.00915733352303505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,7,0.013893333574136099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,15,0.014287999520699183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,15,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,7,0.010133333504199982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,31,0.015728000551462173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,31,0.011722666521867117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,63,0.015909332782030106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,63,0.011706666400035223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,127,0.0161920003592968
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,127,0.011754666765530905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,255,0.012736000120639801
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,511,0.013242666920026144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,255,0.008453333129485449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,511,0.008026666939258575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,1023,0.009786666681369146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,1023,0.018981333822011948
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,2047,0.028543998797734577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,2047,0.012341332932313284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,4095,0.04088533421357473
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,4095,0.01953599974513054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,8191,0.026026666164398193
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,8191,0.05840000013510386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,16383,0.087226668993632
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,16383,0.044666667779286705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,32767,0.0654720018307368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,32767,0.12310399611790974
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,65535,0.20010666052500406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,1,0.010165333126982054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,65535,0.10935999949773152
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,7,0.010250666489203772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,1,0.007514666765928268
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,3,0.010319999729593595
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,3,0.0058453331391016645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,7,0.005946666623155276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,15,0.010565333068370819
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,15,0.00573333352804184
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,31,0.010501333822806677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,31,0.006261333202322324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,63,0.006384000182151794
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,127,0.006351999938488007
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,255,0.013605333864688873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,127,0.010431999961535135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,255,0.007370666911204656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,63,0.010490667074918747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,511,0.019253333409627277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,511,0.00878399983048439
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,2047,0.0429013321797053
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,1023,0.027664000789324444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,1023,0.015290666371583939
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,4095,0.06725333134333293
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,2047,0.02073066681623459
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,4095,0.03232000023126602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,8191,0.04588800172011057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,8191,0.08373866478602092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,16383,0.1193333367506663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,16383,0.06883200009663899
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,32767,0.19024000565210977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,32767,0.11389866471290588
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,65535,0.3309546709060669
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,65535,0.2047413388888041
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,1,0.06318399806817372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,1,0.057333335280418396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,3,0.06359999875227611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,7,0.05551466842492422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,7,0.06322133541107178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,3,0.05508799850940704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,15,0.06393066545327504
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,15,0.05557866891225179
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,31,0.06380266447861989
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,31,0.05699199934800466
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,63,0.0645653357108434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,63,0.055488000313440956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,127,0.06503466765085857
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,255,0.07420800129572551
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,127,0.06538133323192596
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,1,0.08821333448092143
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,3,0.08799466490745544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,1,0.04364799956480662
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,3,0.04376000165939331
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,255,0.10467732946077983
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,7,0.08825066685676575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,7,0.04333333174387614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,15,0.04413333535194397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,15,0.0881066620349884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,31,0.08898666501045227
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,31,0.044106667240460716
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,63,0.08917867143948872
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,127,0.09060800075531006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,127,0.06379200021425883
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,63,0.04370133578777313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,1,0.15627732872962952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,1,0.055760001142819725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,255,0.10268800457318623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,255,0.07770666480064392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,3,0.15552000204722086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,7,0.15582399566968283
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,3,0.05522133409976959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,15,0.15711466471354166
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,15,0.0558186670144399
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,7,0.055125330885251365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,127,0.16127467155456543
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,63,0.15993066628774008
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,63,0.055642664432525635
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,31,0.055919999877611794
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,127,0.06554666658242543
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,31,0.1592586636543274
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,255,0.1838080088297526
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,255,0.08434666196505229
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,7,0.2956373294194539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,1,0.29309332370758057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,1,0.12427199880282085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,3,0.2951040069262187
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,3,0.12461866935094197
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,7,0.12500799695650736
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,15,0.29758934179941815
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,31,0.30266133944193524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,31,0.12444800138473511
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,63,0.29969600836435956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,15,0.12477333347002666
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,63,0.12460266550381978
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,255,0.3428639968236287
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,127,0.2993706663449605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,127,0.1418560047944387
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,255,0.18570133050282797
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,1,0.11980799833933513
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,3,0.12001066406567891
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,1,0.16219733158747354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,3,0.1627946694691976
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,7,0.12007466952006023
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,7,0.1634719967842102
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,15,0.16134400169054666
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,31,0.12046399712562561
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,15,0.12008000413576762
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,31,0.16314133008321127
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,63,0.12063466509183247
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,63,0.16300800442695618
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,127,0.12123733758926392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,1,0.16522133350372314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,127,0.17881067593892416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,1,0.1092693308989207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,3,0.10942932963371277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,7,0.16634666919708252
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,7,0.10897599657376607
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,3,0.1660533348719279
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,15,0.10914666453997295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,15,0.16683733463287354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,31,0.16869866847991943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,31,0.10912000139554341
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,63,0.16874132553736368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,63,0.10884799559911092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,127,0.16921599706013998
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,1,0.30561065673828125
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,1,0.10483733812967937
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,127,0.12312533458073933
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,3,0.10518933335940044
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,7,0.3086026708285014
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,7,0.10476799805959065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,15,0.3107093373934428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,3,0.3060106635093689
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,15,0.1048479974269867
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,31,0.31108800570170086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,31,0.1046346624692281
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,63,0.3105493386586507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,63,0.1043839951356252
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,127,0.312885324160258
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,127,0.122597336769104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,1,0.24246400594711304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,3,0.5871359904607137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,3,0.24190932512283325
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,1,0.5835786660512289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,7,0.5899253288904825
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,7,0.24197866519292197
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,15,0.5937813520431519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,15,0.2421440084775289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,31,0.5969653526941935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,31,0.24182933568954468
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,63,0.5895466804504395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,1,0.013477332890033722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,3,0.013717333475748697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,1,0.014736000448465347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,7,0.013728000223636627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,3,0.014922666052977243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,63,0.2424373428026835
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,127,0.5854399998982748
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,127,0.27661333481470746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,15,0.014245333770910898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,7,0.014853333433469137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,31,0.014368000129858652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,15,0.016154666741689045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,31,0.01988799994190534
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,63,0.020031999796628952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,127,0.016735999534527462
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,63,0.014533333480358124
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,127,0.019909333437681198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,255,0.023423999547958374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,255,0.02252800017595291
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,511,0.015109332899252573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,511,0.012479999413092932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,1023,0.015722667177518208
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,1023,0.014085333794355392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,2047,0.02566933383544286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,2047,0.024383999407291412
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,4095,0.02808533360560735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,4095,0.030394665896892548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,8191,0.04577599962552389
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,8191,0.044549331068992615
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,16383,0.06676266590754192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,16383,0.07173333565394084
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,32767,0.1274186670780182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,32767,0.10445333520571391
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,1,0.013370666652917862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,1,0.014965333044528961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,3,0.013637332866589228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,3,0.014869333555301031
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,7,0.013647999614477158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,7,0.01523200049996376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,15,0.01421333352724711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,31,0.01423466702302297
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,15,0.01640533283352852
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,31,0.01993600030740102
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,63,0.014373333503802618
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,63,0.01987733319401741
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,127,0.016741332908471424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,127,0.02011200040578842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,255,0.012597333639860153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,255,0.008976000050703684
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,1023,0.019280000279347103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,511,0.013418667018413544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,511,0.012181332955757776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,2047,0.029253333806991577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,1023,0.01179733375708262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,2047,0.02014933278163274
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,4095,0.04115733255942663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,4095,0.023775999744733173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,8191,0.05884799857934316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,8191,0.03748800108830134
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,16383,0.08732266227404277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,1,0.01032533310353756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,32767,0.12467199563980103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,16383,0.061893333991368614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,32767,0.09874666730562846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,1,0.006144000217318535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,3,0.010506667196750641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,3,0.006271999950210254
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,7,0.008682666967312494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,7,0.011567999919255575
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,15,0.010522666076819101
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,15,0.006874666859706243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,31,0.01032533310353756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,31,0.006224000205596288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,63,0.010431999961535135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,63,0.0063573333124319715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,127,0.011424000064531961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,255,0.013343999783198038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,127,0.008837333569924036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,255,0.008618666479984919
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,511,0.019237333287795384
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,511,0.009162666896979014
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,1023,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,1023,0.02794133375088374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,2047,0.04275199770927429
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,2047,0.018672000616788864
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,4095,0.028565332293510437
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,4095,0.06673066814740498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,8191,0.04232533276081085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,8191,0.08534399668375652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,16383,0.1207360029220581
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,16383,0.0605973352988561
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,1,0.010746666540702185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,32767,0.10076266527175903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,3,0.01051733394463857
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,1,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,7,0.010938666760921478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,3,0.0063573333124319715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,32767,0.19243200620015463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,7,0.0063146669417619705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,15,0.010853332777818045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,15,0.005893333504597346
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,31,0.010543999572594961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,63,0.012202666451533636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,31,0.008623999853928884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,63,0.006085333103934924
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,127,0.006666666517655055
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,127,0.011514666179815928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,255,0.020879998803138733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,255,0.008240000034372011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,511,0.028090665737787884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,511,0.014101333916187286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,1023,0.02476266771554947
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,1023,0.045328001181284584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,2047,0.06485333542029063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,2047,0.03051200012365977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,4095,0.08363733688990276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,4095,0.04423466821511587
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,8191,0.06671466430028279
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,16383,0.1893226703008016
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,8191,0.11889599760373433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,16383,0.11247467001279195
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,32767,0.3301493326822917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,1,0.013712000101804733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,32767,0.20331732432047525
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,1,0.014837333311637243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,3,0.015247999380032221
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,3,0.013754667093356451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,7,0.013653332988421122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,15,0.014287999520699183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,7,0.015040000279744467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,15,0.01643199970324834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,31,0.019808000574509304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,63,0.014709333578745524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,31,0.0145066666106383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,63,0.019861333072185516
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,127,0.0199946661790212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,255,0.012805332740147909
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,127,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,255,0.009061333412925402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,511,0.013397333522637686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,511,0.009749333063761393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,1023,0.019589333484570186
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,2047,0.029498666524887085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,1023,0.012506666282812754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,2047,0.0201706662774086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,4095,0.041237334410349526
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,4095,0.023823998868465424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,8191,0.06022400160630544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,8191,0.037045332292715706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,16383,0.062165334820747375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,16383,0.09044266740481059
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,1,0.006069333602984746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,3,0.010362666721145311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,1,0.011733333269755045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,32767,0.09802666306495667
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,32767,0.12637866536776224
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,3,0.006528000036875407
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,7,0.010368000095089277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,15,0.010266666611035665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,15,0.00754666638871034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,7,0.01190399999419848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,31,0.010944000134865442
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,63,0.010602666685978571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,31,0.008549333239595095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,63,0.008330666770537695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,127,0.010490667074918747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,127,0.006773333375652631
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,255,0.013280000537633896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,255,0.009237333511312803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,511,0.00933333362142245
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,511,0.019413333386182785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,1023,0.028079998989899952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,1023,0.015135999768972397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,2047,0.0435146689414978
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,4095,0.0668693333864212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,2047,0.01869333287080129
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,4095,0.02850666642189026
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,8191,0.08558400472005208
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,8191,0.04200533529122671
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,16383,0.12077866991360982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,1,0.01089599976936976
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,16383,0.060453335444132485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,32767,0.19283199310302734
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,32767,0.10028266906738281
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,1,0.011941333611806234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,3,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,7,0.0063786668082078295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,7,0.011727999895811081
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,3,0.007658666620651881
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,15,0.007711999739209811
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,15,0.0120319997270902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,31,0.010746666540702185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,31,0.011125333607196808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,63,0.010645333677530289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,63,0.006229333579540253
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,255,0.020714666694402695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,127,0.011893333246310553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,127,0.007578666632374127
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,255,0.010645333677530289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,511,0.028069332242012024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,511,0.014159999787807465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,1023,0.04531733194986979
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,1023,0.024533333877722423
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,2047,0.06594666838645935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,2047,0.030389333764712017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,8191,0.11895466844240825
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,4095,0.04432533184687296
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,4095,0.08356266220410664
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,8191,0.06709333260854085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,16383,0.19037866592407227
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,16383,0.11269866426785786
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,32767,0.33241067330042523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,32767,0.20315200090408325
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,3,0.006645333642760913
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,7,0.006581333155433337
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,31,0.006735999758044879
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,15,0.00877333308259646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,1,0.008453333129485449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,63,0.006751999879876773
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,127,0.007397333160042763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,63,0.015226667126019796
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,1,0.015520000209410986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,7,0.015541333705186844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,31,0.015696000307798386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,15,0.01552533358335495
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,255,0.013557333499193192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,511,0.04628799855709076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,3,0.01524266724785169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,127,0.015461333096027374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,1023,0.06136533121267954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,2047,0.08357866605122884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,4095,0.11918399731318156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,255,0.030069333811601002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,4095,0.06607466439406078
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,511,0.023887999355793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,8191,0.18973867098490396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,8191,0.11162133018175761
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,1023,0.029365333418051403
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,16383,0.2023520072301229
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,16383,0.33081066608428955
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,2047,0.04317333300908407
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,32767,0.6100426514943441
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,32767,0.38391466935475665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,1,0.008037333066264788
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,1,0.00927466650803884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,3,0.008000000069538752
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,3,0.00921066664159298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,7,0.008143999924262365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,7,0.009119999905427298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,15,0.008373333141207695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,31,0.008954666554927826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,15,0.009413333609700203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,31,0.0099093330403169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,63,0.008933333059151968
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,63,0.01181866725285848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,255,0.01180800050497055
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,255,0.010319999729593595
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,127,0.01180800050497055
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,127,0.009141333401203156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,511,0.01666133354107539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,511,0.016384000579516094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,1023,0.01741333305835724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,2047,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,1023,0.01725333308180173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,2047,0.01746133342385292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,4095,0.029674666623274486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,4095,0.02956266701221466
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,8191,0.01826133330663045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,8191,0.015802666544914246
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,16383,0.018965333700180054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,16383,0.019893333315849304
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,32767,0.024058667321999867
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,32767,0.02422400067249934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,65535,0.028965334097544353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,65535,0.030581332743167877
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,1,0.007914666707317034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,131071,0.03993066648642222
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,1,0.00895999992887179
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,3,0.008197333042820295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,3,0.009039999917149544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,131071,0.04070399949947993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,15,0.008432000254591307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,7,0.008010666817426682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,15,0.009519999846816063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,7,0.009114666531483332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,31,0.010138666878143946
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,31,0.00897066667675972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,63,0.009141333401203156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,63,0.011733333269755045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,127,0.009008000294367472
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,127,0.011674666156371435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,255,0.010293333480755487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,255,0.011882666498422623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,1023,0.017594666530688603
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,511,0.016554666062196095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,1023,0.017397332936525345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,511,0.01621333385507266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,2047,0.017792000124851864
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,4095,0.015263999501864115
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,2047,0.017312000195185345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,8191,0.01838933303952217
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,4095,0.010608000059922537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,8191,0.012805332740147909
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,16383,0.020256000260512035
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,16383,0.014975999792416891
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,32767,0.024656000236670177
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,32767,0.019280000279347103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,65535,0.024266667664051056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,1,0.008122666428486506
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,65535,0.030202666918436687
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,1,0.009119999905427298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,3,0.008037333066264788
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,131071,0.035173334181308746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,131071,0.05726400017738342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,3,0.009418666362762451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,7,0.008053333188096682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,7,0.009290666629870733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,15,0.008245333408315977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,31,0.009109333157539368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,15,0.009541333342591921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,31,0.010026666646202406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,63,0.009173333023985228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,127,0.009253333633144697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,63,0.011920000116030375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,127,0.011839999506870905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,255,0.010069333637754122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,255,0.011727999895811081
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,511,0.016271999726692837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,511,0.01632533346613248
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,1023,0.017317333569129307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,1023,0.01735466718673706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,2047,0.015013333410024643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,2047,0.009061333412925402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,4095,0.010458666831254959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,4095,0.015477333217859268
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,8191,0.019189332922299702
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,8191,0.012506666282812754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,16383,0.021114667256673176
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,16383,0.015087999403476715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,32767,0.034314667185147606
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,32767,0.02103466788927714
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,65535,0.046469335754712425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,65535,0.027050666511058807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,1,0.007989333321650824
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,1,0.00961599995692571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,3,0.008080000057816505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,131071,0.05499200026194254
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,131071,0.07668266693751018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,7,0.008623999853928884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,3,0.009072000160813332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,7,0.009141333401203156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,15,0.008277333031098047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,15,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,31,0.009141333401203156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,63,0.009402666861812273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,63,0.011792000383138657
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,31,0.009994666402538618
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,127,0.009045333291093508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,255,0.010293333480755487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,127,0.011823999385039011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,511,0.01642666632930438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,255,0.011877333124478659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,1023,0.013631999492645264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,511,0.016384000579516094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,1023,0.007930666829148928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,2047,0.015210667004187902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,4095,0.016197333733240765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,4095,0.010506667196750641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,8191,0.014218666901191076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,8191,0.025578667720158894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,16383,0.03009066730737686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,16383,0.01721599946419398
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,32767,0.048485333720842995
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,65535,0.07291199763615926
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,2047,0.00890666681031386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,32767,0.03209066639343897
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,65535,0.04309333364168803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,131071,0.1239359974861145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,131071,0.075162669022878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,1,0.008122666428486506
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,1,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,3,0.008346666892369589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,7,0.009450666606426239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,7,0.008170666793982187
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,3,0.009472000102202097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,15,0.00847999999920527
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,15,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,31,0.00897066667675972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,31,0.010410666465759277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,63,0.009472000102202097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,63,0.011898666620254517
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,127,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,127,0.012074666718641916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,255,0.010490667074918747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,255,0.012144000579913458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,1023,0.017935999979575474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,511,0.01672533278663953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,511,0.016389333953460056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,1023,0.01762666677435239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,2047,0.029781334102153778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,4095,0.01590399940808614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,2047,0.029525332152843475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,4095,0.013173333058754602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,16383,0.019093333433071773
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,8191,0.019317333896954853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,8191,0.01586666703224182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,16383,0.021173333128293354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,32767,0.02441066751877467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,32767,0.02548266698916753
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,65535,0.031173333525657654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,65535,0.03105599929889043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,1,0.008112000301480293
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,1,0.009178666397929192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,131071,0.05936000247796377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,131071,0.04571733375390371
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,3,0.009125333279371262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,3,0.008133333176374435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,7,0.009343999748428663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,7,0.008218666538596153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,31,0.009050666665037474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,15,0.008357333640257517
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,15,0.009413333609700203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,31,0.010384000216921171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,63,0.00921066664159298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,63,0.011952000359694162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,127,0.009488000224033991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,255,0.010474666953086853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,127,0.011946666985750198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,255,0.011994666109482447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,511,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,1023,0.017664000391960144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,1023,0.01741333305835724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,511,0.016373333831628162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,2047,0.01522133375207583
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,2047,0.00933333362142245
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,4095,0.016271999726692837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,4095,0.01051733394463857
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,8191,0.019914666811625164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,8191,0.012901333471139273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,16383,0.02186666677395503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,32767,0.03535466641187668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,32767,0.021568000316619873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,16383,0.015450666348139444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,65535,0.04797333478927612
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,65535,0.027136000494162243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,1,0.008240000034372011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,131071,0.07806399961312611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,1,0.009365333244204521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,3,0.008383999889095625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,131071,0.043578664461771645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,3,0.009445333232482275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,15,0.009589333087205887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,7,0.008186666915814081
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,31,0.00914666677514712
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,7,0.009301333377758661
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,31,0.01009599988659223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,15,0.00855466661353906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,63,0.012181332955757776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,63,0.009381333366036415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,127,0.009509333098928133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,511,0.01661866654952367
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,255,0.010629333555698395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,255,0.01192533348997434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,511,0.01658133293191592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,127,0.011861333002646765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,1023,0.013493333011865616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,1023,0.008005333443482717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,2047,0.015626666446526844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,2047,0.009119999905427298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,4095,0.01658133293191592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,8191,0.014165333161751429
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,4095,0.010725333044926325
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,8191,0.025621332228183746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,16383,0.016832000265518825
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,32767,0.04948266843954722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,32767,0.031983998914559685
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,16383,0.030784000953038532
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,65535,0.043280000487963356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,65535,0.07328533132870992
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,131071,0.0758186678091685
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,131071,0.12522133191426596
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,1,0.009423999736706415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,3,0.008090666805704435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,3,0.009472000102202097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,1,0.008080000057816505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,7,0.008207999790708223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,7,0.009594666461149851
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,15,0.00842666688064734
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,15,0.009466666728258133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,31,0.010309333602587381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,31,0.009152000149091085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,63,0.009349333122372627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,127,0.009530666594703993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,63,0.011978667229413986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,255,0.010431999961535135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,127,0.012213333199421564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,511,0.012634667257467905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,255,0.012015999605258306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,511,0.0075519997626543045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,1023,0.008165333420038223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,1023,0.013568000247081121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,2047,0.010048000141978264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,2047,0.020581333587567013
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,4095,0.023754666248957317
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,4095,0.01179733375708262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,16383,0.05128000179926554
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,8191,0.020597333709398907
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,8191,0.03495999922355016
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,16383,0.02588266630967458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,32767,0.07836266855398814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,32767,0.04543999830881754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,65535,0.12546666463216147
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,65535,0.07333866755167644
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,1,0.010656000425418219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,3,0.010741333166758219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,1,0.0069386667261521024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,131071,0.20866666237513223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,131071,0.13529066244761148
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,3,0.00690133310854435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,7,0.010863999525705973
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,7,0.007354666789372762
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,15,0.010389333590865135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,15,0.007749333356817563
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,31,0.010490667074918747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,63,0.010778666784365972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,63,0.007007999966541926
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,31,0.01003200002014637
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,127,0.01062400018175443
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,127,0.008143999924262365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,255,0.009375999992092451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,255,0.013471999516089758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,511,0.010965333630641302
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,511,0.01959466685851415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,1023,0.02826133370399475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,2047,0.0439573327700297
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,1023,0.01947733387351036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,2047,0.027717334528764088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,4095,0.06782933572928111
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,4095,0.04347200194994608
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,8191,0.08678932984670003
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,8191,0.06019733349482218
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,16383,0.12372266252835591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,1,0.008469333251317343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,16383,0.0881119966506958
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,1,0.010703999549150467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,3,0.010735999792814255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,3,0.0063573333124319715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,7,0.01081066702802976
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,7,0.0063680000603199005
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,15,0.010869332899649939
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,31,0.006746666505932808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,15,0.00860799973209699
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,63,0.006506666541099548
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,63,0.01166933278242747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,31,0.011999999483426413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,127,0.010693332801262537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,127,0.006864000111818314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,255,0.021162666380405426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,255,0.009125333279371262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,511,0.015034666905800501
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,511,0.028309332827727
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,1023,0.04560533165931702
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,2047,0.06533333162466685
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,1023,0.02149333308140437
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,2047,0.02733866622050603
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,4095,0.04117333392302195
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,4095,0.08364267150561015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,8191,0.11979200442632039
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,8191,0.06226666768391927
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,3,0.015397333850463232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,1,0.007114666824539502
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,16383,0.19242133696873984
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,16383,0.09929066896438599
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,1,0.01522133375207583
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,3,0.006533333410819371
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,7,0.015583999454975128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,7,0.008576000109314919
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,15,0.01569066693385442
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,15,0.006560000280539195
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,31,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,63,0.015482666591803232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,127,0.01598400001724561
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,63,0.010133333504199982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,31,0.008821333448092142
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,255,0.030069333811601002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,127,0.00855466661353906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,255,0.013653332988421122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,511,0.023957334458827972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,1023,0.061520000298817955
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,511,0.046037331223487854
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,1023,0.02993600070476532
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,2047,0.0436160018046697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,2047,0.08363733688990276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,4095,0.06720000008742015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,4095,0.12036800384521484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,8191,0.19176000356674194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,1,0.02531733363866806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,8191,0.11223999659220378
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,16383,0.33217066526412964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,3,0.025583999852339428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,1,0.011066666493813196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,16383,0.20215467611948648
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,7,0.009658666948477427
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,3,0.010672000547250112
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,7,0.0252960001428922
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,15,0.025477332373460133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,31,0.025392000873883564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,63,0.009530666594703993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,15,0.010661333799362183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,63,0.02513599892457326
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,31,0.00961599995692571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,127,0.02590399980545044
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,127,0.010970667004585266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,255,0.049312000473340355
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,255,0.02380266785621643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,511,0.06458133459091187
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,511,0.0296426663796107
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,1023,0.04330666859944662
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,2047,0.0662720004717509
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,1023,0.08038400113582611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,2047,0.12080533305803935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,4095,0.1911840041478475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,4095,0.11180266737937927
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,8191,0.33062400420506793
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,8191,0.20272000630696616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,1,0.010250666489203772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,3,0.008336000144481659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,1,0.009216000015536943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,16383,0.6103626489639282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,16383,0.3842933177947998
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,7,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,3,0.00961599995692571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,15,0.008453333129485449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,15,0.00960533320903778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,7,0.010559999694426855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,63,0.012181332955757776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,31,0.010186666622757912
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,63,0.009381333366036415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,31,0.010133333504199982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,127,0.009663999701539675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,127,0.012170666207869848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,511,0.017055999487638474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,511,0.016597333053747814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,1023,0.029877332349618275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,255,0.010506667196750641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,255,0.012154666086037954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,2047,0.016783999900023144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,1023,0.029525332152843475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,2047,0.011546666423479715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,4095,0.017952000101407368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,4095,0.013002666334311167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,8191,0.021242665747801464
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,16383,0.019215999792019527
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,8191,0.016000000139077503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,16383,0.023354666928450268
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,32767,0.03709333389997482
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,32767,0.027834666272004444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,65535,0.05027199784914652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,65535,0.03459733227888743
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,1,0.008256000156203905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,1,0.009370666618148485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,131071,0.07192533214886983
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,131071,0.0809333324432373
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,3,0.009301333377758661
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,7,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,3,0.009141333401203156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,7,0.009317333499590555
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,15,0.008453333129485449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,15,0.009583999713261923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,31,0.009423999736706415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,63,0.012175999581813812
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,63,0.009402666861812273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,31,0.010442666709423065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,127,0.012069333344697952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,255,0.012304000556468964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,127,0.010405333091815313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,255,0.010399999717871347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,511,0.016677333662907284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,1023,0.01331199953953425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,511,0.01703466723362605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,1023,0.008517333616813024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,2047,0.01718933383623759
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,2047,0.009658666948477427
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,8191,0.014639999717473984
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,8191,0.027029333015282948
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,4095,0.011130666981140772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,4095,0.018063999712467194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,16383,0.017802666872739792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,16383,0.0323840007185936
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,32767,0.05129066606362661
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,32767,0.025727999707063038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,65535,0.04970666766166687
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,65535,0.07566399872303009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,1,0.008287999778985977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,1,0.009472000102202097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,3,0.00816000004609426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,131071,0.1272213359673818
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,131071,0.06902933120727539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,3,0.009461333354314169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,7,0.008442666381597519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,7,0.009472000102202097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,15,0.009733333562811216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,15,0.008602666358153025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,31,0.00927466650803884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,63,0.00933333362142245
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,31,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,63,0.012085333466529846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,127,0.009573333586255709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,127,0.012170666207869848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,255,0.010784000158309937
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,255,0.012122667084137598
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,511,0.012538666526476542
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,511,0.007466666400432587
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,1023,0.01351999988158544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,1023,0.008261333530147871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,2047,0.010405333091815313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,2047,0.021695998807748158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,8191,0.03591466695070267
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,4095,0.0249439999461174
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,8191,0.02067733307679494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,4095,0.011760000139474869
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,16383,0.05366399884223938
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,16383,0.025968000292778015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,32767,0.08102933565775554
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,32767,0.04540266593297323
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,65535,0.1267573336760203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,65535,0.07334400216738383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,131071,0.21009600162506104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,3,0.008250666782259941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,1,0.009514666472872099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,1,0.008346666892369589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,131071,0.13593600193659464
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,3,0.00956266683836778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,7,0.008341333518425623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,15,0.009663999701539675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,7,0.009642666826645533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,15,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,63,0.009397333487868309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,31,0.009173333023985228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,31,0.01033599985142549
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,63,0.012053333222866058
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,127,0.01209066684047381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,127,0.009482666850090027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,255,0.012789333860079447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,511,0.013162666310866674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,255,0.007205333560705185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,511,0.007920000081261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,1023,0.009141333401203156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,1023,0.019215999792019527
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,2047,0.028549333413441975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,2047,0.015333333363135656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,4095,0.04101333270470301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,4095,0.018170667191346485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,8191,0.028346667687098186
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,8191,0.058687999844551086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,16383,0.08662933111190796
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,16383,0.045941332976023354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,32767,0.1234773298104604
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,32767,0.07618133227030437
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,65535,0.20020800828933716
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,1,0.01121066634853681
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,65535,0.12731200456619263
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,1,0.007290666922926903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,131071,0.35542933146158856
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,131071,0.21542400121688843
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,3,0.011136000355084738
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,3,0.00697066696981589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,7,0.011477333803971609
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,7,0.007674666742483775
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,15,0.011349332829316458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,15,0.008367999767263731
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,31,0.01126933346192042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,63,0.01116266722480456
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,31,0.007877333089709282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,63,0.007018666714429855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,127,0.008314666648705801
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,127,0.011120000233252844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,255,0.02162666618824005
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,511,0.028768000503381092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,511,0.018778666853904724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,255,0.010346666599313417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,1023,0.04650133351484934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,1023,0.03282133241494497
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,2047,0.06653333206971486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,2047,0.043322667479515076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,4095,0.08593599994977315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,4095,0.05970133344332377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,1,0.015610666324694952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,8191,0.12315199772516887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,1,0.006842666616042455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,8191,0.08718400200208028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,3,0.016085332880417507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,3,0.006693333387374878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,7,0.015802666544914246
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,7,0.007087999954819679
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,15,0.015935999651749928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,15,0.007018666714429855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,63,0.01589866727590561
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,31,0.01573333392540614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,31,0.007765333478649457
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,63,0.007135999699433644
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,127,0.008016000191370646
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,127,0.015647999942302704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,255,0.015141333142916361
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,511,0.04635733366012573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,255,0.03035733352104823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,511,0.021295999487241108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,1023,0.061797335743904114
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,1023,0.027642667293548584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,2047,0.041050667564074196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,4095,0.061008001367251076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,2047,0.08462400237719218
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,1,0.02565866708755493
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,4095,0.12035199999809265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,8191,0.19274665911992392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,8191,0.09917333722114563
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,1,0.009850666547815004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,3,0.00983466642598311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,3,0.02553066611289978
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,7,0.025263999899228413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,7,0.009509333098928133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,15,0.009674666449427605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,31,0.025258667767047882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,15,0.02587733417749405
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,31,0.009754666437705358
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,63,0.025221332907676697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,127,0.025461333493391674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,63,0.009759999811649323
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,127,0.011077333241701126
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,255,0.048997332652409874
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,255,0.023706667125225067
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,511,0.06489066779613495
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,1023,0.08088533580303192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,511,0.030031998952229817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,1023,0.044218664367993675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,2047,0.1211893359820048
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,2047,0.06715733309586842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,4095,0.19266132513682047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,4095,0.11219732960065205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,1,0.01659199967980385
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,1,0.04491733511288961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,8191,0.2032960057258606
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,3,0.04450133442878723
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,8191,0.33513601620992023
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,3,0.01658133293191592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,15,0.04446933170159658
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,7,0.04478933413823446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,7,0.016613333175579708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,31,0.04474666714668274
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,31,0.016597333053747814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,15,0.01658133293191592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,127,0.04498666524887085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,63,0.016362667083740234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,63,0.04513599971930186
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,127,0.018746666610240936
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,255,0.051957334081331887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,255,0.025120000044504803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,511,0.06887466708819072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,1023,0.10316800077756245
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,511,0.038922667503356934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,1023,0.06177066763242086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,2047,0.10690666238466899
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,4095,0.31035733222961426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,2047,0.1750346620877584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,4095,0.19776533047358194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,8191,0.3797706762949626
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,8191,0.5823093255360922
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,1,0.007871999715765318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,1,0.016517333686351776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,3,0.016575999557971954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,3,0.007727999861041705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,7,0.00786666696270307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,15,0.016517333686351776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,7,0.016805333395799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,15,0.00784533346692721
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,31,0.016805333395799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,63,0.00754666638871034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,63,0.016176000237464905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,31,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,127,0.0163680004576842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,127,0.009583999713261923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,255,0.017658667018016178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,255,0.03147733211517334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,511,0.04779199759165446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,511,0.03186666717131933
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,1023,0.06277333199977875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,2047,0.08540266752243042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,2047,0.05898666878541311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,1023,0.040896000961462654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,1,0.02603733291228612
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,1,0.011477333803971609
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,3,0.01028266673286756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,7,0.026149332523345947
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,7,0.010496000448862711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,15,0.025994665920734406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,3,0.02619733413060506
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,31,0.02605866640806198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,15,0.010543999572594961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,63,0.025786665578683216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,127,0.012015999605258306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,127,0.026352000733216602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,63,0.0103946669648091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,31,0.01156266654531161
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,255,0.021717332303524017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,255,0.04965866605440775
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,511,0.06516266862551372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,1023,0.08138133088747661
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,511,0.027600000301996868
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,1023,0.04121066629886627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,2047,0.1220693290233612
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,2047,0.06006933252016703
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,1,0.04478933413823446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,3,0.01655999943614006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,1,0.016629333297411602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,7,0.016496000190575916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,7,0.04468800127506256
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,3,0.04491733511288961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,15,0.045093332727750145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,31,0.044954667488733925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,15,0.016538667182127636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,31,0.016538667182127636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,63,0.016549333930015564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,63,0.044863998889923096
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,127,0.045066664616266884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,127,0.019365333020687103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,255,0.05271466573079427
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,255,0.025653332471847534
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,511,0.03957333415746689
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,1023,0.10426132877667744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,511,0.06921066840489705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,2047,0.10785599549611409
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,1,0.08135466774304707
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,1023,0.062421331803003945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,3,0.08125866452852885
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,2047,0.17638399203618368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,1,0.030074665943781536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,3,0.030207999050617218
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,7,0.08151466647783916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,15,0.08149866759777069
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,7,0.02998399982849757
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,31,0.08133333424727122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,15,0.029978667696317036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,63,0.03001066545645396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,31,0.03019733230272929
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,63,0.08082666496435802
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,127,0.08247999846935272
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,127,0.03509333233038584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,255,0.09330667058626811
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,511,0.07300800085067749
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,255,0.050069332122802734
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,511,0.1251413325468699
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,1023,0.1938826640446981
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,1023,0.11860266327857971
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,2047,0.3329919974009196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,1,0.02719466636578242
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,1,0.012863999853531519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,2047,0.20933866500854492
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,3,0.02720533311367035
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,15,0.02756800005833308
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,3,0.012896000097195307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,7,0.02701333413521449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,7,0.012938667088747025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,31,0.027119999130566914
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,15,0.012762666990359625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,63,0.012874666601419449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,31,0.012896000097195307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,63,0.026890667776266735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,127,0.027450665831565857
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,255,0.0317493329445521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,127,0.014762666076421738
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,511,0.06710933148860931
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,255,0.05253333350022634
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,511,0.04161066561937332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,1,0.014965333044528961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,3,0.014874666929244995
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,1023,0.08435199658075969
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,1,0.046367997924486794
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,3,0.046015997727712
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,1023,0.05997333427270254
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,7,0.04607999821503957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,7,0.014938666174809137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,15,0.04619200030962626
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,15,0.014970666418472925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,127,0.017738666385412216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,31,0.0461706668138504
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,31,0.014858666807413101
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,63,0.045824001232783
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,63,0.014901333798964819
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,127,0.0466186652580897
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,255,0.053946668903032936
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,255,0.024495999018351238
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,511,0.07071466743946075
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,1023,0.10587199529012044
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,1,0.029968000948429108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,511,0.03702399879693985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,3,0.08180266618728638
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,1,0.08197866876920064
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,1023,0.056074668963750206
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,3,0.030000001192092896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,7,0.08183999856313069
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,7,0.030037333567937214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,15,0.08184533317883809
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,31,0.08186133205890656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,15,0.030213333666324615
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,63,0.02980799973011017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,127,0.08306133250395457
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,63,0.08185066779454549
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,31,0.02993600070476532
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,127,0.03620799879233042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,255,0.051455999414126076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,255,0.09504000345865886
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,511,0.07446933289368947
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,1023,0.19617599248886108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,1,0.15025066335995993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,511,0.1272053321202596
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,1,0.05877333382765452
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,3,0.15051199992497763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,1023,0.12016533811887105
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,3,0.057722667853037514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,7,0.15078399578730264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,7,0.05885333319505056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,15,0.15074666341145834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,15,0.05916800101598104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,31,0.15093333522478738
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,31,0.058575997749964394
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,63,0.05811200042565664
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,127,0.15311466654141745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,255,0.1753973364830017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,127,0.07223466535409291
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,63,0.15237333377202353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,255,0.09361599882443745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,511,0.23726399739583334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,511,0.13860799868901572
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,1,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,1023,0.3703999916712443
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,1023,0.22984000047047934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,1,0.010618666807810465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,7,0.008613333106040955
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,7,0.00966933307548364
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,3,0.010079999764760336
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,15,0.008778666456540426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,15,0.009866666669646898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,31,0.009381333366036415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,3,0.008938666433095932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,31,0.012746666868527731
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,63,0.012367999802033106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,63,0.009450666606426239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,255,0.010826667149861654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,127,0.012122667084137598
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,127,0.010501333822806677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,255,0.01232533281048139
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,1023,0.013631999492645264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,1023,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,511,0.02827200045188268
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,511,0.02826133370399475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,2047,0.01775466650724411
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,4095,0.013306666165590286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,2047,0.012047999848922094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,4095,0.018453333526849747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,8191,0.027429332335789997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,8191,0.018640000373125076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,16383,0.03309333324432373
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,32767,0.05246399839719137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,32767,0.041562666495641075
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,16383,0.022346665461858112
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,65535,0.07776000102361043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,131071,0.12873599926630655
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,65535,0.05690133571624756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,131071,0.09901866316795349
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,1,0.009530666594703993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,1,0.009599999835093817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,3,0.00850133349498113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,7,0.009642666826645533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,15,0.009653333574533463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,3,0.009461333354314169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,15,0.008863999818762144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,31,0.010575999816258749
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,7,0.009381333366036415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,31,0.0103946669648091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,63,0.009488000224033991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,63,0.012293333808581034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,127,0.009701333319147428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,255,0.01081066702802976
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,1023,0.01341333364446958
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,511,0.007920000081261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,255,0.012389333297808966
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,127,0.012106666962305704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,1023,0.00871999996403853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,2047,0.022053333620230358
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,4095,0.025040000677108765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,2047,0.010832000523805618
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,4095,0.012479999413092932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,8191,0.035674666364987694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,8191,0.017621333400408428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,16383,0.02881066749493281
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,32767,0.0803413341442744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,32767,0.04142933338880539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,16383,0.053247998158137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,65535,0.12743467092514038
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,1,0.008496000121037165
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,65535,0.06980266670385997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,1,0.009519999846816063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,3,0.008432000254591307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,131071,0.2113386591275533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,7,0.00855466661353906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,131071,0.11332799990971883
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,7,0.009775999933481216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,15,0.008842666943868002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,3,0.009493333597977957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,15,0.009632000078757605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,31,0.010480000327030817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,31,0.009472000102202097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,63,0.010645333677530289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,127,0.009642666826645533
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,63,0.012330666184425354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,127,0.012367999802033106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,255,0.007829333345095316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,511,0.013077333569526672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,511,0.007642666498819987
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,1023,0.00966933307548364
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,1023,0.019141333798567455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,255,0.012351999680201212
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,2047,0.015205333630243937
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,2047,0.02886933336655299
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,4095,0.04025600105524063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,8191,0.028778667251269024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,4095,0.01811733345190684
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,8191,0.0584853341182073
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,16383,0.08736532926559448
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,32767,0.12298666437466939
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,32767,0.07689600189526875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,65535,0.1997493306795756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,65535,0.12763733665148416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,16383,0.0462773342927297
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,1,0.011061333119869232
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,131071,0.3574560085932414
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,1,0.005984000240763028
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,131071,0.21542932589848837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,3,0.0059199997534354525
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,7,0.010954666882753372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,3,0.011231999844312668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,15,0.011258666714032492
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,15,0.006117333347598712
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,31,0.01003200002014637
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,63,0.006288000072042148
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,31,0.005914666379491488
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,63,0.009930666536092758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,7,0.007807999849319458
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,127,0.011317333827416102
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,255,0.013631999492645264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,127,0.008597333605090777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,511,0.01940800001223882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,255,0.007386666412154834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,511,0.008474666625261307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,1023,0.027744000156720478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,2047,0.04274133344491323
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,4095,0.03203733265399933
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,2047,0.020869334538777668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,1023,0.015082667271296183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,4095,0.06711466610431671
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,8191,0.08531733353932698
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,8191,0.045279999574025474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,16383,0.11961600184440613
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,32767,0.1894879937171936
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,16383,0.06820799907048543
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,32767,0.11372799674669902
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,131071,0.6142773230870565
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,65535,0.3309653401374817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,65535,0.2039626638094584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,1,0.0483893354733785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,131071,0.38574934005737305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,1,0.022442666192849476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,3,0.048207998275756836
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,3,0.02254933367172877
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,7,0.0481279989083608
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,15,0.022485333184401195
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,15,0.04842133323351542
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,31,0.04805333415667216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,63,0.04773333172003428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,31,0.022554665803909302
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,7,0.02250666668017705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,63,0.022389332453409832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,127,0.048783997694651283
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,127,0.02644266684850057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,255,0.037802666425704956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,511,0.07409599920113881
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,511,0.05522666871547699
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,255,0.05665599803129832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,1,0.08339732885360718
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,1,0.026416001220544178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,3,0.08374399940172832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,3,0.026181332767009735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,7,0.08355200290679932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,7,0.02609066665172577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,15,0.08363733688990276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,15,0.026191999514897663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,63,0.08435733119646709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,63,0.025973332424958546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,31,0.08348266283671062
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,31,0.026554666459560394
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,127,0.08654933174451192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,127,0.03381866713364919
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,255,0.0974826713403066
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,255,0.04682666560014089
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,3,0.061493332187334694
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,1,0.15125866731007895
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,511,0.13052800297737122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,511,0.06596800188223521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,7,0.15172266960144043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,1,0.06261866788069408
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,3,0.15134933590888977
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,15,0.15171733498573303
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,7,0.06117866436640421
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,31,0.06170133252938589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,63,0.15451199809710184
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,15,0.06311466793219249
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,127,0.1556426684061686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,31,0.1534293293952942
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,63,0.06301866471767426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,127,0.07464533547560374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,255,0.17869333426157633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,255,0.09657599528630574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,511,0.24153600136439005
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,1,0.2906773289044698
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,1,0.12343466281890869
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,3,0.12301333745320638
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,3,0.29150400559107464
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,511,0.14215466380119324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,7,0.29258666435877484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,7,0.12307733297348022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,15,0.2939466635386149
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,15,0.12321066856384277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,31,0.12331733107566833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,127,0.29792000850041706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,127,0.1369706690311432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,31,0.2964373429616292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,63,0.2964906692504883
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,63,0.123471995194753
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,255,0.3402880032857259
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,511,0.46194132169087726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,255,0.17973866065343222
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,511,0.2690666715304057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,1,0.08779733379681905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,7,0.08801066875457764
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,1,0.04331733286380768
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,3,0.08842133482297261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,3,0.04374399781227112
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,7,0.043285335103670754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,15,0.042591998974482216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,31,0.08898133039474487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,15,0.08823999762535095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,31,0.04615999758243561
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,63,0.044405331214269005
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,63,0.08910399675369263
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,127,0.062362665931383766
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,255,0.10328533252080281
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,127,0.09013866384824117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,255,0.07786133388678233
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,1,0.05498133103052775
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,1,0.1560426652431488
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,3,0.15593600273132324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,7,0.15556266903877258
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,7,0.05579733351866404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,3,0.0558240016301473
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,15,0.15686933199564615
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,31,0.15904532869656882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,15,0.0544106662273407
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,31,0.05605333546797434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,63,0.054666668176651
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,63,0.16010666886965433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,127,0.16221333543459573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,127,0.06559466818968455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,1,0.2943519949913025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,1,0.12481600046157837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,255,0.18459200859069824
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,255,0.08408000071843465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,3,0.2934346596399943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,7,0.29603199164072674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,7,0.1251200040181478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,3,0.1251146694024404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,15,0.29791466395060223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,15,0.12524799505869547
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,31,0.3023573358853658
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,31,0.12497599919637044
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,63,0.30160532395044964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,63,0.12584533294041952
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,127,0.30082132418950397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,1,0.5732266505559286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,1,0.23996800184249878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,127,0.14265599846839905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,255,0.34643201033274335
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,3,0.5722346703211466
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,3,0.23960000276565552
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,255,0.1860426664352417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,7,0.5767199993133545
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,7,0.2400426665941874
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,15,0.5835200150807699
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,15,0.2400533358256022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,31,0.23984533548355103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,63,0.5837226708730062
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,63,0.24059200286865234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,31,0.5861920118331909
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,127,0.5824373165766398
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,1,0.014058666924635569
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,1,0.009749333063761393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,3,0.013855999956528345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,127,0.2669066588083903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,7,0.014053333550691605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,7,0.008965333302815756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,3,0.00996800015370051
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,255,0.6685333251953125
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,15,0.014325333138306936
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,255,0.35226134459177655
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,15,0.009808000177145004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,31,0.01570133368174235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,63,0.01591466615597407
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,31,0.011786667009194693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,127,0.016149333367745083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,127,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,255,0.018559999763965607
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,63,0.011610666910807291
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,511,0.009461333354314169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,255,0.013594667116800943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,511,0.012858666479587555
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,1023,0.01350933313369751
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,1023,0.010357333347201347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,2047,0.02235200007756551
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,4095,0.015402667224407196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,2047,0.013280000537633896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,4095,0.02553066611289978
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,8191,0.03651199986537298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,8191,0.02646933247645696
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,16383,0.054197331269582115
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,16383,0.03350399931271871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,32767,0.08246399958928426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,32767,0.05986666679382324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,65535,0.12893333037694296
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,1,0.008832000195980072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,1,0.013786666095256805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,3,0.013909333695967993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,3,0.009029333169261614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,7,0.009050666665037474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,7,0.014101333916187286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,65535,0.0960586667060852
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,15,0.009749333063761393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,31,0.015706667055686314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,15,0.014192000031471252
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,31,0.011706666400035223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,63,0.01613333324591319
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,63,0.011834666132926941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,127,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,255,0.012618667135636011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,127,0.011920000116030375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,255,0.007696000238259633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,511,0.0081386665503184
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,511,0.012949333836634954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,1023,0.019226666539907455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,1023,0.009824000298976898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,2047,0.02886933336655299
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,2047,0.012560000022252401
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,4095,0.0195573332409064
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,8191,0.05899199843406677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,4095,0.040735999743143715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,8191,0.02606400102376938
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,16383,0.08686400453249614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,16383,0.04390933116277059
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,32767,0.12325333555539449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,32767,0.06563200056552887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,65535,0.2003306746482849
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,3,0.01002133327225844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,1,0.0058666666348775225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,3,0.007114666824539502
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,1,0.010938666760921478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,65535,0.10950932900110881
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,7,0.010170666500926018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,7,0.006186666587988536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,15,0.005850666513045629
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,15,0.010138666878143946
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,31,0.006010666489601135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,31,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,63,0.005882666756709416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,63,0.010863999525705973
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,127,0.010362666721145311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,127,0.006309333567818006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,255,0.013381333400805792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,511,0.01937599976857503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,255,0.00772266648709774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,511,0.008442666381597519
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,2047,0.04278933505217234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,1023,0.02794666588306427
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,2047,0.020981334149837494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,4095,0.0666133314371109
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,1023,0.015200000256299973
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,4095,0.032058666149775185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,8191,0.08525333801905315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,8191,0.045797333121299744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,16383,0.11998933553695679
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,16383,0.06841066479682922
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,32767,0.1916960080464681
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,32767,0.11374933520952861
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,1,0.010677333921194077
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,65535,0.20409599939982095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,65535,0.3329919974009196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,3,0.010458666831254959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,7,0.010469333579142889
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,3,0.00589866687854131
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,1,0.006106666599710782
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,7,0.00600533311565717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,15,0.010453333457310995
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,15,0.006277333324154218
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,31,0.010597333312034607
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,63,0.010351999973257383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,31,0.006490666419267654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,63,0.005999999741713206
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,127,0.0064213331788778305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,255,0.00810666692753633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,255,0.0206986665725708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,127,0.010629333555698395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,511,0.02808533360560735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,511,0.014240000396966934
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,1023,0.04543999830881754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,2047,0.0301706666747729
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,1023,0.024351999163627625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,2047,0.06505066653092702
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,4095,0.08374399940172832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,4095,0.04365866879622141
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,8191,0.06673599779605865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,8191,0.11843732992808025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,16383,0.18889067570368448
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,16383,0.11213866869608562
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,32767,0.32923734188079834
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,32767,0.20275733868281046
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,65535,0.6117706696192423
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,1,0.013973332941532135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,65535,0.38412264982859295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,3,0.014106666048367819
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,1,0.009039999917149544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,3,0.009109333157539368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,7,0.013882666826248169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,7,0.008938666433095932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,15,0.0145066666106383
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,15,0.009935999910036722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,63,0.011861333002646765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,31,0.015728000551462173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,63,0.016016000260909397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,31,0.011648000528415045
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,127,0.01611199975013733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,127,0.011706666400035223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,511,0.013248000293970108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,511,0.008101333553592363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,255,0.012410666793584824
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,255,0.007903999959429106
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,1023,0.018986667195955913
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,1023,0.009888000165422758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,2047,0.02906133234500885
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,4095,0.041109333435694374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,2047,0.012309333930412928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,4095,0.019445333629846573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,8191,0.02589866767326991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,8191,0.059861332178115845
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,16383,0.0885813335577647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,16383,0.04363200068473816
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,1,0.010405333091815313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,32767,0.12502933541933695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,32767,0.06538133323192596
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,1,0.006117333347598712
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,3,0.010058666889866194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,3,0.005882666756709416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,65535,0.10946666200955708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,65535,0.20124799013137817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,7,0.010234666367371878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,7,0.0058399997651577
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,15,0.010026666646202406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,15,0.006010666489601135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,31,0.009962666779756546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,31,0.005834666391213735
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,63,0.010106666634480158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,63,0.0058026667684316635
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,127,0.010298666854699453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,255,0.013487999637921652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,127,0.006229333579540253
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,255,0.007541333635648091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,511,0.019253333409627277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,511,0.008496000121037165
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,1023,0.02769600103298823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,1023,0.01504533365368843
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,2047,0.0430026650428772
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,2047,0.020634666085243225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,4095,0.06714666883150737
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,4095,0.03223466624816259
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,8191,0.08518399794896443
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,16383,0.12026666601498921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,8191,0.0458186666170756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,16383,0.06853333115577698
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,32767,0.19192532698313394
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,32767,0.11391466856002808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,1,0.0106133334338665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,1,0.006453333422541618
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,3,0.010384000216921171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,65535,0.2048693299293518
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,65535,0.3339466651280721
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,3,0.006048000107208888
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,7,0.010485333700974783
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,15,0.010762666662534079
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,7,0.006325333068768184
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,15,0.006544000158707301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,31,0.010565333068370819
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,63,0.01028266673286756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,31,0.006010666489601135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,63,0.005914666379491488
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,127,0.010826667149861654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,255,0.00854399986565113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,127,0.006549333532651265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,255,0.02065066620707512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,511,0.027952000498771667
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,511,0.014117332796255747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,1023,0.0455626646677653
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,1023,0.02430933217207591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,2047,0.06603200236956279
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,2047,0.030234667162100475
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,4095,0.08378133177757263
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,4095,0.04353600243727366
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,8191,0.11972799897193909
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,8191,0.06674133241176605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,16383,0.18997865915298462
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,16383,0.11201066772143047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,32767,0.3328373432159424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,32767,0.20260266462961832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,1,0.009066666786869368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,65535,0.6152746677398682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,65535,0.3838080167770386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,1,0.008005333443482717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,3,0.008010666817426682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,3,0.00916800027092298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,7,0.00814933329820633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,7,0.009429333110650381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,15,0.00843733362853527
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,31,0.008997333546479544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,31,0.00985599992175897
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,15,0.009173333023985228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,63,0.008933333059151968
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,63,0.011823999385039011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,127,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,127,0.011968000481526056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,255,0.010128000130256018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,511,0.01621866722901662
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,255,0.011802667131026586
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,1023,0.01717866708834966
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,1023,0.017301333447297413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,511,0.016143999993801117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,2047,0.017349333812793095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,2047,0.017466666797796886
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,4095,0.017632000148296356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,4095,0.017477333545684814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,8191,0.012639999389648438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,8191,0.018197332819302876
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,16383,0.019567999988794327
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,32767,0.023872000475724537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,16383,0.014938666174809137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,32767,0.01930133377512296
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,65535,0.0286613330245018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,65535,0.02387733260790507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,131071,0.03979733337958654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,1,0.007877333089709282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,1,0.009119999905427298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,3,0.009050666665037474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,131071,0.03163733333349228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,3,0.00810666692753633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,7,0.008522666369875273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,15,0.008101333553592363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,7,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,31,0.01002133327225844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,31,0.009045333291093508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,15,0.009136000027259191
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,63,0.00921066664159298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,63,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,127,0.00898133342464765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,127,0.011792000383138657
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,255,0.011909333368142446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,511,0.016373333831628162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,255,0.01020800011853377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,511,0.01598400001724561
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,1023,0.017530667285124462
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,2047,0.01730666682124138
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,1023,0.01703466723362605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,2047,0.017152000218629837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,4095,0.015029333531856537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,4095,0.01044800008336703
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,8191,0.012330666184425354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,8191,0.018160000443458557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,16383,0.020165332903464634
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,16383,0.014869333555301031
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,32767,0.024192000428835552
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,32767,0.01903466631968816
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,65535,0.030623999734719593
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,65535,0.02409599969784419
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,1,0.00784533346692721
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,1,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,131071,0.05663999915122986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,131071,0.03612266729275385
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,3,0.007967999825874964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,7,0.009130666653315226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,7,0.008746666833758354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,3,0.009050666665037474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,15,0.00820266641676426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,15,0.009178666397929192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,31,0.00892800030608972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,31,0.009808000177145004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,63,0.009189333145817121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,127,0.009285333255926767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,63,0.011866666376590729
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,127,0.01191466674208641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,255,0.0100853331387043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,255,0.01191466674208641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,511,0.016000000139077503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,511,0.0161013330022494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,1023,0.017263999829689663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,1023,0.017344000438849132
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,2047,0.014981333166360855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,4095,0.015706667055686314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,4095,0.010197333370645842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,2047,0.009130666653315226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,8191,0.019226666539907455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,8191,0.012538666526476542
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,16383,0.015098666151364645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,16383,0.021557333568731945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,32767,0.03497066597143809
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,32767,0.021856000026067097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,65535,0.04638933142026266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,65535,0.02737066646416982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,131071,0.05477866530418396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,131071,0.0777706652879715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,1,0.008645333349704742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,1,0.009136000027259191
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,3,0.009183999771873156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,3,0.00816000004609426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,7,0.008176000167926153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,7,0.009125333279371262
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,15,0.008207999790708223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,31,0.00916800027092298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,15,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,31,0.009930666536092758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,127,0.009093333035707474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,63,0.00922133338948091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,63,0.011920000116030375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,255,0.010138666878143946
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,127,0.011866666376590729
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,255,0.011941333611806234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,511,0.01639466608564059
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,1023,0.017445333302021027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,511,0.01623999948302905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,1023,0.01747200017174085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,2047,0.01736533393462499
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,2047,0.017973333597183228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,4095,0.010570666442314783
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,4095,0.015861333658297855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,8191,0.01924266666173935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,8191,0.012789333860079447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,16383,0.015168000012636185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,16383,0.020848001043001812
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,32767,0.02492266645034154
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,32767,0.01931200052301089
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,65535,0.030917334059874218
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,65535,0.02422933280467987
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,1,0.008047999814152718
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,1,0.009050666665037474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,131071,0.0583840012550354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,131071,0.03562133262554804
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,3,0.007930666829148928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,7,0.008250666782259941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,3,0.009322666873534521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,7,0.009061333412925402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,15,0.008336000144481659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,31,0.00984533317387104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,15,0.009359999870260557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,31,0.008869333192706108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,63,0.011936000237862269
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,63,0.009098666409651438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,127,0.009328000247478485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,127,0.011765333513418833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,255,0.010389333590865135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,255,0.011866666376590729
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,1023,0.01764800027012825
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,511,0.016480000068744022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,511,0.016197333733240765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,1023,0.017194667210181553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,2047,0.009061333412925402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,2047,0.015040000279744467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,4095,0.016330666840076447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,8191,0.019914666811625164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,4095,0.010197333370645842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,16383,0.022074667116006214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,16383,0.015072000523408255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,8191,0.012576000144084295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,32767,0.03522133330504099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,32767,0.021130666136741638
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,65535,0.04789866507053375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,1,0.009216000015536943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,65535,0.02679466704527537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,1,0.007877333089709282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,131071,0.07792533437410991
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,3,0.008010666817426682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,3,0.009205333267649015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,7,0.008127999802430471
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,131071,0.05525333185990652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,15,0.008485333373149237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,15,0.009301333377758661
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,7,0.009136000027259191
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,31,0.009248000259200731
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,63,0.011957333733638128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,31,0.010101333260536194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,63,0.009242666885256767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,127,0.009039999917149544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,127,0.01192533348997434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,255,0.012005332857370377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,255,0.010159999753038088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,511,0.01647466669480006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,511,0.016634666671355564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,1023,0.013338666409254074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,1023,0.008192000289758047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,2047,0.015802666544914246
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,2047,0.009056000038981438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,8191,0.025983999172846477
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,4095,0.010373333469033241
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,8191,0.014192000031471252
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,4095,0.01724799970785777
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,16383,0.0169813334941864
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,16383,0.031104000906149547
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,32767,0.050714666644732155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,32767,0.031930667658646904
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,65535,0.07412266731262207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,1,0.010490667074918747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,65535,0.04307200014591217
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,131071,0.12589866916338602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,1,0.005930666501323382
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,3,0.010410666465759277
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,131071,0.07600000003973643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,3,0.006384000182151794
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,7,0.010378666842977205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,7,0.006501333167155583
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,15,0.006458666796485583
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,15,0.01022933361430963
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,31,0.010239999741315842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,31,0.006186666587988536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,63,0.010384000216921171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,63,0.006565333033601443
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,127,0.010415999839703241
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,255,0.013674666484196981
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,127,0.008421333506703377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,511,0.019466667125622433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,511,0.009194666519761086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,255,0.008890666688481966
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,1023,0.02788266787926356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,1023,0.015253332753976187
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,2047,0.043040002385775246
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,2047,0.018624000251293182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,4095,0.06729599833488464
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,4095,0.028362666567166645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,8191,0.08588799834251404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,8191,0.04197333256403605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,16383,0.060405333836873375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,16383,0.12295466661453247
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,1,0.010735999792814255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,1,0.006090666477878888
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,3,0.010778666784365972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,32767,0.19461333751678467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,32767,0.10029866298039754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,7,0.010768000036478043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,3,0.007813333223263422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,15,0.00600533311565717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,31,0.010816000401973724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,7,0.010133333504199982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,15,0.011882666498422623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,31,0.00754666638871034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,63,0.010832000523805618
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,63,0.0064319999267657595
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,127,0.006560000280539195
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,255,0.02082666630546252
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,127,0.010549332946538925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,255,0.009253333633144697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,511,0.027952000498771667
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,511,0.0143306665122509
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,1023,0.04561600089073181
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,1023,0.024501333634058636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,2047,0.06533866624037425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,2047,0.03046933313210805
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,4095,0.08429333567619324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,4095,0.04460800190766653
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,16383,0.19172267119089761
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,8191,0.06717333197593689
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,8191,0.11979200442632039
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,16383,0.1120746632417043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,1,0.01540800059835116
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,32767,0.3335040012995402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,3,0.01568000018596649
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,32767,0.20314133167266846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,1,0.0064853330453236895
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,3,0.008634666601816813
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,7,0.01575999955336253
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,7,0.014570667097965876
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,15,0.015168000012636185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,15,0.00696000022192796
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,31,0.01569066693385442
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,31,0.006538666784763336
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,63,0.015461333096027374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,63,0.00814933329820633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,127,0.007461333026488622
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,127,0.01591466615597407
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,255,0.013477332890033722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,255,0.029978667696317036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,511,0.04620266457398733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,511,0.02370133250951767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,1023,0.06225066880385081
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,2047,0.08347200353940327
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,1023,0.029680001238981884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,4095,0.11990933616956075
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,2047,0.043706665436426796
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,4095,0.06631466746330261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,8191,0.19106133778889975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,8191,0.11135466893513997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,16383,0.3327146569887797
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,1,0.008272000278035799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,16383,0.20252267519632974
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,1,0.009413333609700203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,3,0.008453333129485449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,3,0.009279999881982803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,32767,0.38408533732096356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,7,0.008223999912540117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,32767,0.6136906544367472
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,7,0.009461333354314169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,15,0.008613333106040955
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,15,0.009450666606426239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,31,0.010064000263810158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,31,0.009205333267649015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,63,0.011962667107582092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,63,0.009173333023985228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,127,0.010437333335479101
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,255,0.010431999961535135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,127,0.012138667205969492
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,255,0.012149333953857422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,511,0.01642666632930438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,1023,0.017642666896184284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,511,0.01657066618402799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,1023,0.017525333911180496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,2047,0.009530666594703993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,4095,0.017957333475351334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,2047,0.016597333053747814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,8191,0.013104000439246496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,8191,0.02126399924357732
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,4095,0.010698666175206503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,16383,0.023152001202106476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,16383,0.015311999867359797
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,32767,0.03669333209594091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,32767,0.021482666333516438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,65535,0.04985066751639048
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,65535,0.027263998985290527
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,1,0.008074666683872541
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,1,0.009258666386206945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,131071,0.08027733365694682
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,131071,0.043738668163617454
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,3,0.009119999905427298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,3,0.009530666594703993
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,7,0.00933333362142245
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,15,0.008416000132759413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,7,0.009242666885256767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,15,0.009589333087205887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,31,0.009082666908701261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,63,0.009381333366036415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,31,0.010277333358923594
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,63,0.012335999558369318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,127,0.009429333110650381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,255,0.010362666721145311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,127,0.012063999970753988
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,255,0.012117333710193634
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,511,0.016629333297411602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,511,0.016490666816631954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,1023,0.013466666142145792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,1023,0.008330666770537695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,2047,0.016976000120242436
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,2047,0.00915733352303505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,4095,0.017978666971127193
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,4095,0.01055466632048289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,8191,0.02720000098148982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,8191,0.014149333039919535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,16383,0.0322080006202062
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,32767,0.05161066850026449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,16383,0.017024000485738117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,32767,0.031717332700888314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,65535,0.07597866654396057
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,65535,0.04308799902598063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,131071,0.0754613329966863
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,131071,0.12761066357294717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,1,0.008325333396593729
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,1,0.009269333134094873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,3,0.008469333251317343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,3,0.009525333220760027
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,7,0.008383999889095625
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,7,0.009375999992092451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,31,0.010138666878143946
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,15,0.008661333471536636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,15,0.009663999701539675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,31,0.009253333633144697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,63,0.009349333122372627
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,63,0.012037333101034164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,127,0.009509333098928133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,127,0.012005332857370377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,255,0.010496000448862711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,255,0.012106666962305704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,511,0.012714666624863943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,2047,0.021615999440352123
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,1023,0.008143999924262365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,511,0.007536000261704127
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,1023,0.013461332768201828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,4095,0.02532266577084859
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,2047,0.010079999764760336
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,4095,0.011786667009194693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,8191,0.036490666369597115
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,16383,0.05399466554323832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,8191,0.020400000115235645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,16383,0.025807999074459076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,32767,0.08111466467380524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,32767,0.04550399879614512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,65535,0.07297066847483318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,65535,0.12738666931788126
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,131071,0.21061333020528158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,1,0.010746666540702185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,1,0.007071999832987785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,131071,0.13487466176350912
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,3,0.006495999793211619
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,7,0.010778666784365972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,7,0.0069386667261521024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,15,0.011098666737476984
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,15,0.0063680000603199005
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,31,0.010879999647537867
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,63,0.010805333654085795
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,31,0.007157333195209503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,63,0.00884799969693025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,127,0.010960000256697336
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,127,0.0069866664707660675
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,255,0.021002667645613354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,255,0.00897066667675972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,511,0.028202667832374573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,1023,0.04561600089073181
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,511,0.015002666662136713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,1023,0.02162666618824005
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,2047,0.06549333532651265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,2047,0.027632000545660656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,4095,0.08499733606974284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,4095,0.04073066761096319
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,8191,0.12116799751917522
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,8191,0.06277333199977875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,1,0.01540800059835116
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,1,0.006442666674653689
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,3,0.015439999600251516
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,3,0.00655466690659523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,16383,0.19393599033355713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,7,0.015333333363135656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,16383,0.10008533795674641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,7,0.006720000257094701
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,15,0.015285332997639975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,15,0.006544000158707301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,31,0.01562133307258288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,31,0.006757333253820737
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,63,0.0064213331788778305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,127,0.015882667154073715
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,127,0.006693333387374878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,511,0.045797333121299744
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,63,0.015311999867359797
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,255,0.02995733420054118
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,255,0.013674666484196981
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,511,0.02425066630045573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,1023,0.06106133262316386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,2047,0.044256001710891724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,2047,0.08366933465003967
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,1023,0.03012266755104065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,4095,0.11990400155385335
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,4095,0.06658666829268138
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,8191,0.19285867611567178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,8191,0.11230933666229248
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,1,0.025146665672461193
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,1,0.00955200009047985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,3,0.025434667865435284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,16383,0.3354986508687337
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,16383,0.20249066750208536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,3,0.010938666760921478
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,7,0.009749333063761393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,7,0.025349333882331848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,15,0.02510933329661687
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,15,0.00972800018886725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,31,0.025077333052953083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,31,0.009712000067035357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,63,0.025125332176685333
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,63,0.015504000087579092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,127,0.025461333493391674
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,255,0.049365331729253135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,127,0.012128000458081564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,255,0.023647998770078022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,1023,0.08036800225575765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,511,0.029552000264326733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,511,0.0641546646753947
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,1023,0.04382933179537455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,2047,0.12083199620246887
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,2047,0.06633600095907848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,4095,0.11219732960065205
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,4095,0.19210133949915567
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,8191,0.33340267340342206
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,8191,0.2015893260637919
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,1,0.015935999651749928
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,1,0.006858666737874349
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,3,0.015919999529918034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,7,0.016010666886965435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,3,0.008080000057816505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,16383,0.3843146562576294
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,15,0.01586666703224182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,7,0.006618666773041089
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,15,0.006911999856432279
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,31,0.007141333073377609
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,16383,0.6158080101013184
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,31,0.01616000011563301
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,63,0.016106666376193363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,127,0.01598400001724561
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,127,0.007424000029762586
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,63,0.00855466661353906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,255,0.030437332888444264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,255,0.015237333873907724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,511,0.02178666740655899
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,511,0.046480000019073486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,1023,0.02755733331044515
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,2047,0.08428266644477844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,1023,0.06205866734186808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,1,0.009695999945203463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,1,0.025568000972270966
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,2047,0.04106666644414266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,4095,0.1221440037091573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,4095,0.06162666777769724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,3,0.025392000873883564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,3,0.00973866693675518
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,7,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,7,0.02565866708755493
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,15,0.02568000058333079
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,31,0.0252960001428922
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,15,0.009610666582981745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,63,0.025205334027608235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,127,0.025685332715511322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,127,0.01080000028014183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,63,0.00956266683836778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,255,0.048991998036702476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,511,0.06486933430035909
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,255,0.023978665471076965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,511,0.03012799968322118
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,1023,0.04419200122356415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,2047,0.12213333447774251
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,1023,0.08108266691366832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,2047,0.06695466736952464
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,1,0.04446400205294291
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,4095,0.11225600043932597
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,4095,0.19380799929300943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,1,0.016549333930015564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,3,0.044341335693995156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,3,0.016629333297411602
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,7,0.016447999825080235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,15,0.04437333345413208
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,7,0.044735997915267944
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,15,0.016517333686351776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,31,0.04494399825731913
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,31,0.016597333053747814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,63,0.044266665975252785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,63,0.01637866720557213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,127,0.04473066826661428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,127,0.018805333723624546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,255,0.025050667424996693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,255,0.05230933427810669
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,511,0.06966400146484375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,511,0.03872533390919367
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,1023,0.10342933734258015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,1023,0.0617439995209376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,2047,0.17675199111302695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,2047,0.10726933677991231
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,1,0.026357332865397137
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,4095,0.31540799140930176
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,1,0.01044800008336703
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,3,0.025941332181294758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,4095,0.19789334138234457
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,3,0.010591999938090643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,7,0.025909334421157837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,7,0.010330666477481524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,15,0.026122666895389557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,15,0.010437333335479101
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,31,0.0260959987839063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,31,0.010512000570694605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,63,0.010490667074918747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,63,0.025936000049114227
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,127,0.0260959987839063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,127,0.012042666474978128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,255,0.049839998284975685
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,255,0.021669333179791767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,511,0.06531733274459839
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,511,0.028336000939210255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,1023,0.08214933176835378
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,1,0.01657066618402799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,1,0.0447626660267512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,1023,0.041296000281969704
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,3,0.04474666714668274
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,2047,0.12341333429018657
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,2047,0.06061866879463196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,3,0.01642666632930438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,7,0.044997334480285645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,7,0.016549333930015564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,15,0.04470400015513102
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,31,0.044810667634010315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,31,0.01664000004529953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,63,0.04460800190766653
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,15,0.016602666427691776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,63,0.016415999581416447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,127,0.04525866607824961
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,127,0.01911466692884763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,255,0.05283733208974203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,255,0.025616000096003216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,511,0.07044266661008199
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,511,0.039834665755430855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,1023,0.10418132940928142
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,2047,0.10782933235168457
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,1023,0.06261866788069408
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,2047,0.1766080061594645
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,1,0.08116266628106435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,1,0.030181333422660828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,3,0.03012799968322118
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,3,0.0815413345893224
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,7,0.030154667794704437
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,15,0.030239999294281006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,7,0.0816480020682017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,31,0.08121066788832347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,31,0.03019733230272929
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,15,0.08142399787902832
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,63,0.08079466720422109
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,63,0.02997333308060964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,127,0.08272533118724823
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,127,0.035258665680885315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,255,0.05027733246485392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,511,0.1265120009581248
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,255,0.09423466523488362
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,511,0.07296533385912578
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,1023,0.19509865840276083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,1,0.008352000266313553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,2047,0.33477866649627686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,1,0.009418666362762451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,1023,0.11890133221944173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,2047,0.20936532815297446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,7,0.008261333530147871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,3,0.00943999985853831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,3,0.010490667074918747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,7,0.009445333232482275
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,15,0.009637333452701569
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,15,0.008538666491707167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,31,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,63,0.009477333476146063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,31,0.009898666913310686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,63,0.012186666329701742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,127,0.010773333410422007
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,127,0.012181332955757776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,255,0.0122079998254776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,255,0.010496000448862711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,511,0.016783999900023144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,511,0.016805333395799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,1023,0.013381333400805792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,1023,0.008458666503429413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,2047,0.016895999511082966
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,4095,0.01807466646035512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,2047,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,4095,0.010794666906197866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,8191,0.0273333340883255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,8191,0.014912000546852747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,16383,0.017136000096797943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,16383,0.03245333333810171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,32767,0.02629333237806956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,32767,0.05189866820971171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,65535,0.07666666805744171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,65535,0.04994133114814758
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,1,0.008506666868925095
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,131071,0.12732266386349997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,131071,0.06831466654936473
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,3,0.008256000156203905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,1,0.010389333590865135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,7,0.008256000156203905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,3,0.010608000059922537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,15,0.008661333471536636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,7,0.009392000113924345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,15,0.009599999835093817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,31,0.010437333335479101
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,31,0.009322666873534521
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,63,0.012202666451533636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,63,0.01022933361430963
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,127,0.012170666207869848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,255,0.012154666086037954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,127,0.010341333225369453
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,255,0.010527999450763067
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,511,0.012970666090647379
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,511,0.007525333513816197
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,1023,0.013290667285521826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,2047,0.021882665654023487
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,2047,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,8191,0.036544000109036766
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,1023,0.009072000160813332
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,4095,0.011765333513418833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,4095,0.025279998779296875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,8191,0.020831999679406483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,16383,0.05358933409055074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,16383,0.026165333886941273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,32767,0.08025066554546356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,32767,0.04542933404445648
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,65535,0.0728959987560908
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,65535,0.12566399574279785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,1,0.008410666758815447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,131071,0.13520532846450806
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,1,0.009648000200589498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,3,0.00943999985853831
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,3,0.008197333042820295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,131071,0.2102880080540975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,7,0.008336000144481659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,7,0.010330666477481524
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,31,0.010405333091815313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,15,0.009749333063761393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,15,0.008810666700204214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,31,0.009242666885256767
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,63,0.009583999713261923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,127,0.009466666728258133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,63,0.012154666086037954
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,255,0.012469333906968435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,255,0.007274666801095009
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,127,0.012122667084137598
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,511,0.008330666770537695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,1023,0.019424000134070713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,511,0.013050666699806849
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,2047,0.01516266663869222
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,1023,0.008986666798591614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,2047,0.02899733434120814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,8191,0.028405333558718365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,4095,0.017871999492247898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,8191,0.05894933144251505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,4095,0.04082666585842768
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,16383,0.08678399523099263
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,16383,0.045653333266576133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,32767,0.124208003282547
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,32767,0.07663999994595845
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,65535,0.20015466213226318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,65535,0.1267733375231425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,1,0.04632533093293508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,131071,0.3569120168685913
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,131071,0.21413866678873697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,1,0.014959999670584997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,3,0.04620266457398733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,7,0.04613866905371348
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,7,0.014970666418472925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,3,0.01504533365368843
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,15,0.04610666632652283
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,15,0.014981333166360855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,31,0.046015997727712
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,31,0.014997333288192749
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,63,0.04568533102671305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,63,0.014885333677132925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,127,0.04637866715590159
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,127,0.017850667238235474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,255,0.054272000988324486
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,255,0.024527999262015026
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,511,0.036917333801587425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,511,0.07130133112271626
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,1,0.0816480020682017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,1,0.030031998952229817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,1023,0.10586133599281311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,3,0.03012799968322118
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,3,0.08172266681989034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,7,0.08180800080299377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,1023,0.05611200133959452
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,7,0.030063999195893604
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,15,0.08142933249473572
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,15,0.02991466720898946
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,31,0.08162133395671844
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,31,0.030218665798505146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,63,0.08182399968306224
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,63,0.02994133283694585
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,127,0.03618666778008143
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,127,0.08343999584515889
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,255,0.09499733646710713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,255,0.051541333397229515
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,511,0.12735999623934427
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,511,0.07461333274841309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,1,0.1504693329334259
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,1023,0.1959679921468099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,1023,0.12026133139928182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,3,0.05989866455396017
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,3,0.15005333224932352
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,1,0.05930666625499725
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,7,0.15083733201026917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,15,0.1509173313776652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,7,0.05902933577696482
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,31,0.1520746648311615
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,15,0.060506666700045265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,31,0.060175999999046326
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,63,0.15339199701944986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,63,0.05891199906667074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,127,0.07297599812348683
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,127,0.15430399775505066
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,255,0.17769600947697958
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,255,0.09404266873995464
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,511,0.24023999770482382
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,511,0.1394773324330648
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,1,0.08349333206812541
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,1023,0.37401068210601807
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,1,0.026000000536441803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,1023,0.23069866498311362
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,3,0.08365333080291748
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,7,0.08358933528264363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,3,0.026159999271233875
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,7,0.02605333427588145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,15,0.08354133367538452
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,15,0.026127999027570088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,31,0.08352532982826233
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,31,0.02645866572856903
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,63,0.08392533659934998
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,63,0.02626666675011317
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,127,0.033701332906881966
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,127,0.08609066406885783
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,1,0.15109333395957947
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,255,0.09830400347709656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,1,0.06384533147017162
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,255,0.04659200211366018
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,3,0.15100799997647604
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,511,0.06568533182144165
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,511,0.130431999762853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,7,0.15122666954994202
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,7,0.06452266871929169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,3,0.061706667145093284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,15,0.15135467052459717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,31,0.1532693306605021
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,15,0.06390400230884552
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,31,0.06390933195749919
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,63,0.15502933661142984
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,63,0.06172800064086914
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,127,0.1567200024922689
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,255,0.1792693336804708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,127,0.07506666580835979
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,255,0.09700266520182292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,1,0.2906240026156108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,1,0.12446932991345723
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,3,0.29079999526341754
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,511,0.2430773377418518
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,3,0.1243946651617686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,511,0.14205867052078247
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,7,0.12424000104268391
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,15,0.29333333174387616
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,7,0.2919466694196065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,31,0.2972693244616191
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,15,0.12386666735013326
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,63,0.2972960074742635
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,63,0.12492799758911133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,127,0.2972053289413452
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,31,0.12428266803423564
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,127,0.13883733749389648
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,1,0.009637333452701569
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,1,0.008303999900817871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,255,0.18084800243377686
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,3,0.009455999980370203
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,3,0.009450666606426239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,7,0.00855466661353906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,255,0.34494932492574054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,7,0.009599999835093817
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,15,0.00867733359336853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,511,0.2706986665725708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,15,0.00955200009047985
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,31,0.010437333335479101
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,63,0.012250666817029318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,31,0.01020800011853377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,63,0.010591999938090643
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,127,0.00979200005531311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,255,0.012383999923865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,127,0.01246400053302447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,511,0.46666133403778076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,255,0.010847999403874079
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,511,0.012805332740147909
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,2047,0.021850667893886566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,1023,0.008826666822036108
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,511,0.008837333569924036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,1023,0.013237333546082178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,2047,0.010949333508809408
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,4095,0.012682666381200155
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,4095,0.025424001117547352
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,8191,0.036570665736993156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,8191,0.01681600014368693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,16383,0.053472002347310386
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,32767,0.08156266808509827
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,32767,0.04195199906826019
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,16383,0.0278613343834877
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,65535,0.12805333733558655
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,65535,0.06920533378918965
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,1,0.008863999818762144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,1,0.009712000067035357
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,3,0.008517333616813024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,7,0.008463999877373377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,131071,0.21100266774495444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,131071,0.11412800351778667
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,3,0.010389333590865135
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,15,0.008943999807039896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,7,0.009690666571259499
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,15,0.00980266680320104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,31,0.009541333342591921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,31,0.010490667074918747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,63,0.012240000069141388
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,127,0.012383999923865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,255,0.007541333635648091
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,63,0.00956266683836778
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,127,0.01044800008336703
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,255,0.012597333639860153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,511,0.007727999861041705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,511,0.013077333569526672
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,1023,0.019199999670187633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,2047,0.029103999336560566
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,1023,0.009269333134094873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,2047,0.01525866612792015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,4095,0.0179626668492953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,4095,0.041002665956815086
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,8191,0.028768000503381092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,8191,0.058378666639328
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,16383,0.04609066744645437
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,16383,0.0867733359336853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,32767,0.12355200449625652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,32767,0.07691200077533722
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,65535,0.19989866018295288
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,1,0.01020800011853377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,1,0.006064000229040782
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,65535,0.12785599629084268
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,3,0.010175999874869982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,131071,0.35711467266082764
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,131071,0.21572800477345785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,3,0.005914666379491488
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,7,0.0102613332370917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,15,0.01028266673286756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,7,0.006031999985376994
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,15,0.0063146669417619705
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,31,0.006293333445986112
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,31,0.01055466632048289
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,63,0.010277333358923594
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,63,0.0058666666348775225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,127,0.010314666976531347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,127,0.006533333410819371
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,255,0.013584000368913015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,255,0.008192000289758047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,511,0.01926933353145917
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,2047,0.04351999859015147
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,1023,0.02790933350721995
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,1023,0.015008000036080679
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,2047,0.020655999581019085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,511,0.009381333366036415
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,4095,0.06761066615581512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,4095,0.03201066702604294
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,8191,0.04568533102671305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,8191,0.08522133032480876
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,16383,0.0682239979505539
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,16383,0.12079999844233195
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,32767,0.19107200702031454
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,32767,0.11366400122642517
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,65535,0.3333280086517334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,65535,0.20414400100708008
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,3,0.008527999743819237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,1,0.008410666758815447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,1,0.009541333342591921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,131071,0.6211520036061605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,7,0.008634666601816813
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,131071,0.38558932145436603
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,3,0.009717333440979322
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,7,0.009695999945203463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,15,0.008693333094318708
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,15,0.00983466642598311
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,31,0.009397333487868309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,63,0.009519999846816063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,31,0.010618666807810465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,63,0.01246400053302447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,127,0.009541333342591921
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,127,0.012554666648308435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,255,0.012693333129088083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,255,0.00721066693464915
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,511,0.013045333325862885
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,1023,0.019280000279347103
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,511,0.007690666864315669
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,1023,0.009258666386206945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,2047,0.01534933348496755
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,2047,0.029125332832336426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,4095,0.04072533299525579
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,4095,0.0180479995906353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,8191,0.05958400170008341
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,8191,0.028597332537174225
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,16383,0.04645333190759023
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,16383,0.0879253347714742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,32767,0.1252959966659546
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,32767,0.07705600063006084
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,65535,0.20057600736618042
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,1,0.010175999874869982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,1,0.005893333504597346
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,3,0.010165333126982054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,65535,0.12689600388209024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,3,0.005914666379491488
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,131071,0.3595946629842122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,7,0.010346666599313417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,7,0.006144000217318535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,131071,0.21708800395329794
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,15,0.006415999804933866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,15,0.009973333527644476
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,31,0.01009599988659223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,31,0.006144000217318535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,63,0.010053333515922228
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,63,0.006181333214044571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,127,0.010474666953086853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,255,0.013845333208640417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,127,0.007167999943097432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,255,0.007391999786098798
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,511,0.019141333798567455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,1023,0.027786667148272198
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,511,0.008341333518425623
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,1023,0.01509333277742068
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,2047,0.02083733429511388
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,4095,0.06745066742102306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,2047,0.04334933559099833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,4095,0.03195200115442276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,8191,0.08559466401735942
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,8191,0.04514666895071665
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,16383,0.12082667152086894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,16383,0.06819733480612437
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,32767,0.19233065843582153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,32767,0.1136853297551473
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,65535,0.20446399847666422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,65535,0.3336160182952881
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,1,0.007829333345095316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,1,0.008885333314538002
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,3,0.008074666683872541
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,3,0.009045333291093508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,7,0.00785600021481514
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,7,0.009061333412925402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,131071,0.6179680029551188
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,131071,0.38605864842732746
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,31,0.009045333291093508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,15,0.009119999905427298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,15,0.008229333286484083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,31,0.010064000263810158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,63,0.008943999807039896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,127,0.011754666765530905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,63,0.011776000261306763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,127,0.009008000294367472
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,255,0.010181333248813948
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,255,0.011781333635250727
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,511,0.016149333367745083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,511,0.016085332880417507
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,1023,0.01716800034046173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,1023,0.01721599946419398
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,2047,0.01746133342385292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,2047,0.017093333105246227
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,4095,0.017375999440749485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,4095,0.017279999951521557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,8191,0.017984000345071156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,8191,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,16383,0.015002666662136713
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,16383,0.019797333826621372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,32767,0.02372266600529353
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,32767,0.01876266673207283
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,65535,0.02882133424282074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,1,0.008058666562040647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,65535,0.023893333971500397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,131071,0.031717332700888314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,131071,0.03965333352486292
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,1,0.00902399979531765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,3,0.008021333565314611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,3,0.008901333436369896
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,7,0.007962666451931
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,7,0.00927466650803884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,15,0.008309333274761835
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,15,0.009232000137368837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,31,0.009002666920423508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,31,0.009946666657924652
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,63,0.009093333035707474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,63,0.01173866664369901
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,127,0.01191466674208641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,127,0.009178666397929192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,255,0.010154666379094124
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,255,0.01191466674208641
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,1023,0.017530667285124462
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,2047,0.017360000560681026
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,511,0.016271999726692837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,511,0.015919999529918034
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,1023,0.01701333373785019
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,4095,0.01526933287580808
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,2047,0.017162666966517765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,8191,0.012304000556468964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,4095,0.010453333457310995
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,8191,0.01894933357834816
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,16383,0.020138667275508244
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,32767,0.025093334416548412
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,16383,0.01479999969402949
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,32767,0.019061333189407986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,65535,0.0306986669699351
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,65535,0.024165332317352295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,1,0.00898133342464765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,1,0.00786666696270307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,131071,0.05760533114274343
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,3,0.008058666562040647
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,131071,0.03554133325815201
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,3,0.00903466654320558
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,7,0.007957333077987036
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,7,0.008938666433095932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,31,0.008954666554927826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,15,0.008277333031098047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,15,0.009103999783595404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,31,0.009701333319147428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,63,0.009141333401203156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,127,0.011770666887362799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,127,0.009061333412925402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,63,0.011711999773979187
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,255,0.010405333091815313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,255,0.011781333635250727
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,511,0.016197333733240765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,511,0.01602666700879733
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,1023,0.0173333336909612
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,1023,0.01735466718673706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,4095,0.016016000260909397
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,2047,0.017125333348910015
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,2047,0.017658667018016178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,4095,0.010191999996701876
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,8191,0.01915733392039935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,8191,0.012330666184425354
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,16383,0.014912000546852747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,16383,0.020645332833131153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,32767,0.02513066679239273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,32767,0.01897066707412402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,1,0.00795199970404307
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,65535,0.030879999200503033
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,1,0.009253333633144697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,65535,0.02405333270629247
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,131071,0.0351946676770846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,131071,0.05832533538341522
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,3,0.008410666758815447
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,7,0.008090666805704435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,3,0.00898133342464765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,7,0.009066666786869368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,15,0.008373333141207695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,15,0.009343999748428663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,31,0.00878399983048439
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,63,0.011765333513418833
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,63,0.009178666397929192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,31,0.01003200002014637
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,127,0.009328000247478485
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,127,0.011685332904259363
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,255,0.011936000237862269
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,511,0.016197333733240765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,511,0.016208000481128693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,255,0.010399999717871347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,1023,0.017781333376963932
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,1023,0.017429333180189133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,2047,0.015599999576807022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,4095,0.01020800011853377
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,4095,0.016176000237464905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,2047,0.009008000294367472
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,8191,0.012586666891972223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,8191,0.01988799994190534
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,16383,0.02186666677395503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,16383,0.015077333897352219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,32767,0.0351946676770846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,32767,0.02165866643190384
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,65535,0.048538664976755776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,65535,0.026714667677879333
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,1,0.010064000263810158
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,1,0.005754666402935982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,131071,0.07886399825414021
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,131071,0.05522666871547699
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,3,0.006415999804933866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,3,0.010165333126982054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,7,0.010026666646202406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,15,0.01027199998497963
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,7,0.00595199999709924
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,15,0.00589866687854131
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,63,0.010159999753038088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,31,0.010384000216921171
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,31,0.006282666698098183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,63,0.006095999851822853
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,127,0.010138666878143946
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,127,0.00655466690659523
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,511,0.01929066702723503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,255,0.013370666652917862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,255,0.007424000029762586
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,511,0.008527999743819237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,1023,0.027722666660944622
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,1023,0.015146666516860327
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,2047,0.0429066667954127
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,4095,0.06797333558400472
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,2047,0.020975999534130096
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,4095,0.03226666649182638
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,8191,0.04612799982229868
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,8191,0.08551466464996338
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,16383,0.12191999951998393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,16383,0.06860800087451935
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,32767,0.11388267079989116
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,1,0.00596266674498717
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,32767,0.19397334257761636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,1,0.010458666831254959
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,3,0.010405333091815313
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,3,0.006074666976928711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,65535,0.3375306526819865
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,65535,0.20442134141921997
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,7,0.010559999694426855
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,15,0.010693332801262537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,7,0.005882666756709416
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,15,0.006186666587988536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,31,0.010357333347201347
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,31,0.006218666831652324
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,63,0.006197333335876465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,63,0.010698666175206503
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,255,0.02111999938885371
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,127,0.010608000059922537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,127,0.006319999694824219
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,255,0.008303999900817871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,511,0.014074667046467463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,511,0.0283146674434344
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,1023,0.045647998650868736
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,1023,0.024282666544119518
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,2047,0.03018666555484136
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,2047,0.06620266536871593
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,4095,0.04407466451327006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,4095,0.08404800295829773
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,8191,0.11938666303952535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,8191,0.0666240006685257
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,16383,0.1920213301976522
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,16383,0.11221333344777425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,1,0.007978666573762894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,32767,0.3327680031458537
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,32767,0.20257065693537393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,1,0.009130666653315226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,3,0.008229333286484083
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,3,0.009301333377758661
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,7,0.00808533343176047
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,65535,0.38435200850168866
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,65535,0.6173813343048096
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,31,0.00897066667675972
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,7,0.009343999748428663
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,15,0.008261333530147871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,15,0.009232000137368837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,31,0.0100853331387043
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,63,0.009061333412925402
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,63,0.011920000116030375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,127,0.009359999870260557
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,127,0.011877333124478659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,255,0.010277333358923594
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,511,0.016410666207472484
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,511,0.016389333953460056
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,1023,0.017594666530688603
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,255,0.011936000237862269
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,1023,0.01735466718673706
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,2047,0.0170666662355264
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,2047,0.009029333169261614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,4095,0.017717332889636356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,8191,0.012416000167528788
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,8191,0.02093333254257838
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,4095,0.010165333126982054
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,16383,0.023002666731675465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,16383,0.015130666395028433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,32767,0.021151999632517498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,32767,0.036544000109036766
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,65535,0.04997866849104563
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,65535,0.02701333413521449
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,1,0.007920000081261
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,131071,0.08038400113582611
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,131071,0.05500266452630361
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,1,0.009130666653315226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,3,0.008176000167926153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,7,0.008037333066264788
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,3,0.009237333511312803
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,7,0.00919999989370505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,15,0.008320000022649765
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,15,0.009472000102202097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,31,0.00895999992887179
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,31,0.010159999753038088
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,63,0.009189333145817121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,63,0.011834666132926941
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,127,0.009743999689817429
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,127,0.011909333368142446
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,255,0.010234666367371878
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,255,0.011978667229413986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,1023,0.013370666652917862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,1023,0.008080000057816505
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,511,0.01658133293191592
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,511,0.016442666451136272
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,4095,0.018277333428462345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,2047,0.009189333145817121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,4095,0.010586666564146677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,2047,0.01720533271630605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,8191,0.02771199991305669
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,8191,0.014106666048367819
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,16383,0.03241066634654999
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,16383,0.016879999389251072
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,32767,0.052015999952952065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,32767,0.03207999964555105
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,65535,0.07611200213432312
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,65535,0.04308266441027323
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,1,0.010602666685978571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,1,0.006106666599710782
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,131071,0.12867732842763266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,131071,0.07558933397134145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,3,0.01073066641887029
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,3,0.006197333335876465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,7,0.010725333044926325
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,7,0.006213333457708359
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,15,0.010661333799362183
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,31,0.010901333143313726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,15,0.006074666976928711
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,63,0.009066666786869368
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,31,0.0063733334342638654
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,63,0.01062400018175443
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,127,0.011786667009194693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,127,0.006682666639486949
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,255,0.008181333541870117
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,255,0.02060266708334287
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,511,0.014159999787807465
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,511,0.02811199923356374
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,1023,0.04568533102671305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,1023,0.024533333877722423
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,2047,0.030378667016824085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,2047,0.06625600159168243
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,4095,0.08468266328175862
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,4095,0.044400001565615334
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,8191,0.12065066893895467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,8191,0.06717866659164429
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,16383,0.11251733700434367
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,1,0.015365333606799444
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,16383,0.19361066818237305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,1,0.006549333532651265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,3,0.015562667200962702
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,3,0.009994666402538618
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,7,0.01570133368174235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,7,0.006469333544373512
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,15,0.015317333241303762
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,15,0.0064213331788778305
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,31,0.015333333363135656
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,32767,0.3377600113550822
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,63,0.015589332828919092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,32767,0.2029973268508911
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,31,0.006549333532651265
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,63,0.006495999793211619
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,127,0.015594666202863058
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,127,0.007146666447321574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,255,0.030133334298928578
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,255,0.013674666484196981
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,511,0.023717333873112995
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,511,0.04640000065167745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,2047,0.08437866965929668
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,1023,0.02976000060637792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,2047,0.04334400097529093
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,1023,0.06171200176080068
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,4095,0.12016000350316365
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,4095,0.06658666829268138
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,8191,0.19242133696873984
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,1,0.006805333619316419
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,1,0.015637333194414776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,8191,0.11193600296974182
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,16383,0.33579198519388836
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,16383,0.20282133420308432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,3,0.015615999698638916
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,3,0.006890666360656421
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,7,0.015573333948850632
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,32767,0.6204906702041626
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,7,0.0074879998962084455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,15,0.01580799991885821
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,31,0.006575999781489372
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,31,0.015583999454975128
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,32767,0.3842293421427409
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,63,0.01552533358335495
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,15,0.007466666400432587
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,127,0.007205333560705185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,63,0.006405333057045937
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,127,0.015706667055686314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,255,0.03001066545645396
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,255,0.013514666507641474
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,511,0.04609066744645437
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,511,0.024218666056791942
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,1023,0.03010133405526479
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,2047,0.08441600203514099
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,1023,0.06165333092212677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,4095,0.12138133247693379
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,2047,0.04422399898370107
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,4095,0.06676800052324931
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,1,0.025413334369659424
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,1,0.009557333464423815
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,3,0.009797333429257074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,8191,0.19334399700164795
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,3,0.025061334172884624
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,7,0.009610666582981745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,7,0.0252960001428922
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,8191,0.11214933792750041
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,15,0.025536000728607178
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,15,0.009685333197315535
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,31,0.02571733295917511
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,31,0.010677333921194077
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,63,0.02517866591612498
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,63,0.009674666449427605
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,127,0.025759999950726826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,127,0.010949333508809408
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,511,0.06487466891606648
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,255,0.04901866614818573
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,255,0.02351466566324234
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,1023,0.043605332573254905
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,2047,0.12167466680208842
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,511,0.029663999875386555
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,2047,0.06650666892528534
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,1023,0.08078933258851369
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,4095,0.19299199183781943
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,4095,0.11185066898663838
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,8191,0.33712534109751385
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,3,0.025487999121348064
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,1,0.025557334224383037
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,7,0.0258240004380544
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,1,0.010149333626031876
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,3,0.010602666685978571
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,7,0.009695999945203463
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,8191,0.20286933581034342
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,15,0.025514667232831318
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,31,0.009706666693091393
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,31,0.025648000339667004
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,15,0.009583999713261923
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,127,0.025733334322770435
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,63,0.025061334172884624
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,63,0.010746666540702185
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,127,0.011034666250149408
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,255,0.049413333336512245
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,255,0.023936000963052113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,511,0.06483200192451477
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,511,0.030074665943781536
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,1023,0.08130666613578796
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,1023,0.044266665975252785
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,2047,0.06762666503588359
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,2047,0.12243733803431193
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,1,0.016480000068744022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,1,0.04484266539414724
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,3,0.04483200112978617
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,4095,0.19434666633605957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,4095,0.1127359966437022
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,7,0.044677332043647766
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,7,0.016682667036851246
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,3,0.016597333053747814
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,15,0.04438399771849314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,15,0.01672533278663953
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,31,0.016677333662907284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,63,0.045040001471837364
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,31,0.045050665736198425
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,63,0.016447999825080235
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,127,0.045040001471837364
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,127,0.01887999971707662
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,255,0.052383999029795326
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,255,0.025242666403452556
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,511,0.06947200000286102
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,511,0.03923733284076055
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,1023,0.06214933097362518
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,2047,0.17662400007247925
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,1,0.008378666515151659
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,1,0.009338666374484697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,1023,0.10352533062299092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,3,0.00810666692753633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,2047,0.10761066277821858
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,3,0.009408000235756239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,7,0.008314666648705801
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,7,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,15,0.008389333263039589
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,31,0.010026666646202406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,63,0.009338666374484697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,63,0.011962667107582092
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,31,0.010026666646202406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,4095,0.31383466720581055
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,4095,0.19860267639160156
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,127,0.009397333487868309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,15,0.010527999450763067
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,255,0.010490667074918747
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,255,0.012223999947309494
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,127,0.012037333101034164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,511,0.016506666938463848
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,1023,0.007936000203092894
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,1023,0.013525333255529404
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,511,0.01642666632930438
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,2047,0.009178666397929192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,4095,0.018474667022625606
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,8191,0.014080000420411428
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,4095,0.010735999792814255
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,8191,0.027119999130566914
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,2047,0.017194667210181553
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,16383,0.03233066697915395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,32767,0.031983998914559685
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,16383,0.016682667036851246
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,32767,0.05208000044027964
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,65535,0.07620800038178761
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,1,0.009253333633144697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,1,0.009450666606426239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,131071,0.12736533085505167
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,65535,0.043141335248947144
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,3,0.009568000212311745
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,3,0.00916800027092298
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,7,0.008240000034372011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,15,0.008559999987483025
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,15,0.00949866697192192
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,7,0.009296000003814697
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,31,0.010133333504199982
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,31,0.010309333602587381
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,131071,0.0755573312441508
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,63,0.01192533348997434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,63,0.009258666386206945
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,127,0.009535999968647957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,127,0.011978667229413986
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,255,0.010421333213647207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,511,0.012853333105643591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,1023,0.013381333400805792
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,255,0.012165332833925882
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,2047,0.02204799900452296
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,1023,0.008261333530147871
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,4095,0.025194667279720306
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,2047,0.010026666646202406
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,511,0.008517333616813024
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,4095,0.011823999385039011
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,8191,0.03591466695070267
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,16383,0.05373866856098175
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,8191,0.0204373337328434
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,16383,0.025792000194390614
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,32767,0.08148266871770223
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,32767,0.045093332727750145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,65535,0.07315200070540111
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,1,0.044581333796183266
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,131071,0.2107200026512146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,131071,0.1345919966697693
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,1,0.01647466669480006
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,65535,0.12734400232632956
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,3,0.04483200112978617
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,3,0.016522667060295742
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,7,0.044821331898371376
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,7,0.016645333419243496
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,15,0.04459733267625173
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,15,0.016565332810084026
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,31,0.04458666841189066
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,63,0.016384000579516094
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,63,0.044677332043647766
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,127,0.045082668463389076
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,127,0.01911466692884763
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,31,0.016602666427691776
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,255,0.052943999568621315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,255,0.02605333427588145
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,511,0.07031466563542683
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,1,0.08149866759777069
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,1023,0.06256533165772755
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,3,0.08158400158087413
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,1,0.03014400104681651
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,1023,0.10474133491516113
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,3,0.030095999439557392
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,511,0.03957333415746689
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,2047,0.17710399627685547
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,7,0.08146666487058003
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,7,0.030026666820049286
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,15,0.08156799773375194
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,31,0.08109866579373677
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,15,0.0305226668715477
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,63,0.03013866643110911
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,127,0.08323733508586884
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,31,0.03014400104681651
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,63,0.08064533273379008
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,2047,0.10814932982126872
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,255,0.0953493316968282
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,127,0.03502399971087774
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,255,0.05029866596062978
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,511,0.12733866771062216
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,511,0.073594664533933
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,1023,0.1946186621983846
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,1023,0.11914133032162984
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,3,0.08179733157157898
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,1,0.08169599870840709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,3,0.03025600065787633
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,1,0.030181333422660828
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,2047,0.2097653349240621
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,2047,0.33538134892781574
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,15,0.08195200065771739
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,31,0.08147199948628743
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,31,0.03018666555484136
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,7,0.08167466521263123
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,15,0.02995733420054118
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,7,0.030037333567937214
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,63,0.08133333424727122
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,63,0.03002133220434189
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,127,0.08452799916267395
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,127,0.037445334096749626
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,255,0.09593066573143005
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,1,0.15103999773661295
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,1,0.061424002051353455
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,3,0.1508693297704061
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,255,0.05145066479841868
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,511,0.12784000237782797
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,3,0.05955199897289276
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,511,0.07441066702206929
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,7,0.15075733264287314
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,7,0.06061866879463196
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,15,0.1504746675491333
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,1023,0.19646932681401572
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,31,0.15196800231933594
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,31,0.06112533311049143
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,63,0.1537920037905375
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,63,0.06038400034109751
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,15,0.05890133480230967
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,127,0.15544000267982483
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,1023,0.12071999907493591
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,127,0.07354666789372762
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,255,0.17837866147359213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,255,0.09455999732017517
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,1,0.009509333098928133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,3,0.00860799973209699
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,1,0.009232000137368837
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,3,0.009477333476146063
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,511,0.24078933397928873
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,7,0.008186666915814081
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,15,0.009786666681369146
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,7,0.009408000235756239
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,15,0.009472000102202097
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,31,0.010346666599313417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,31,0.009375999992092451
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,63,0.009397333487868309
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,511,0.14032000303268433
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,63,0.012037333101034164
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,127,0.012202666451533636
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,255,0.010527999450763067
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,255,0.012149333953857422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,127,0.010421333213647207
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,511,0.012906666845083237
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,1023,0.013194666554530462
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,511,0.008613333106040955
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,2047,0.02189333240191142
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,1023,0.23106666405995688
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,1023,0.008176000167926153
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,1023,0.37547731399536133
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,4095,0.025434667865435284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,2047,0.010703999549150467
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,4095,0.011744000017642975
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,8191,0.020799999435742695
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,8191,0.035989334185918175
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,16383,0.053488001227378845
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,16383,0.025797332326571148
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,32767,0.08178666730721791
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,32767,0.045365333557128906
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,65535,0.07301333546638489
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,65535,0.12773866454760233
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,1,0.008272000278035799
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,3,0.008592000231146812
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,1,0.009402666861812273
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,131071,0.21033066511154175
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,3,0.010442666709423065
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,7,0.00842666688064734
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,7,0.009573333586255709
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,15,0.009797333429257074
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,15,0.009493333597977957
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,131071,0.13551466663678488
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,31,0.009189333145817121
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,31,0.010304000228643417
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,63,0.010533332824707031
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,63,0.012159999459981918
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,127,0.009461333354314169
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,127,0.012341332932313284
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,511,0.012975999464591345
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,511,0.007861333588759104
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,255,0.007167999943097432
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,255,0.012495999534924826
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,4095,0.04123199979464213
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,2047,0.015322666615247726
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,1023,0.009088000282645226
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,2047,0.028618666032950085
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,1023,0.019333332777023315
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,4095,0.018090666582187016
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,8191,0.05900266766548157
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,8191,0.02842666705449422
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,16383,0.08733333150545756
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,16383,0.04540266593297323
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,32767,0.12412266929944356
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,32767,0.0764160007238388
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,65535,0.12754133343696594
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,65535,0.2007840077082316
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,131071,0.35675732294718426
TRTLLM,1.0.0rc3,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,131071,0.21543999512990317
