framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,7,0.03344533344109853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,15,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,3,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,7,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,1,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,15,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,3,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,1,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,31,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,63,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,127,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,63,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,255,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,127,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,255,0.02698666602373123
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,31,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,511,0.05460800230503082
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,2047,0.17715734243392944
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,1023,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,1023,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,2047,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,511,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,fp8,4095,0.18107734123865762
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,7,0.010618666807810465
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,1,float16,float16,4095,0.33673067887624103
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,31,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,127,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,63,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,127,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,255,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,1023,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,2047,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,4095,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,4095,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,8191,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,8191,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,float16,16383,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,1,float16,fp8,16383,0.0358240008354187
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,1,0.012282667060693106
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,3,0.011258666714032492
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,3,0.010949333508809408
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,15,0.011930666863918304
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,31,0.010687999427318573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,7,0.011621333658695221
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,63,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,127,0.011333333949247995
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,127,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,255,0.012053333222866058
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,4095,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,4095,0.023557332654794056
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,8191,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,16383,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,1,0.05766933163007101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,fp8,8191,0.03755199909210205
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,1,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,1,float16,float16,16383,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,3,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,7,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,3,0.058703998724619545
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,7,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,15,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,15,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,31,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,31,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,63,0.057706668972969055
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,63,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,127,0.04371733466784159
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,255,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,255,0.04576000074545542
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,511,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,127,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,1023,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,2047,0.33740798632303876
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,float16,1023,0.17442133029301962
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,511,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,1,float16,fp8,2047,0.1889280080795288
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,1,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,7,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,3,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,15,0.013306666165590286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,63,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,255,0.012762666990359625
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,511,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,511,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,2047,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,2047,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,4095,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,8191,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,8191,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,4095,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,float16,16383,0.19012266397476196
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,1,float16,fp8,16383,0.10718400279680888
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,7,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,3,0.07852266728878021
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,1,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,3,0.10716799894968669
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,1,0.07817066709200542
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,7,0.07851733267307281
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,15,0.105813334385554
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,31,0.10547199845314026
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,15,0.07852800190448761
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,63,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,63,0.10614933570226033
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,127,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,127,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,255,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,255,0.0788373351097107
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,511,0.17851734161376953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,511,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,31,0.07851199805736542
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,fp8,1023,0.20155733823776245
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,1,float16,float16,1023,0.33398934205373126
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,1,0.14813866217931113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,3,0.19934932390848795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,1,0.20342934131622314
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,7,0.20138667027155557
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,3,0.14780267079671225
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,15,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,31,0.19985065857569376
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,31,0.14780799547831217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,63,0.20377600193023682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,63,0.14780267079671225
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,127,0.14813866217931113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,127,0.19933867454528809
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,7,0.14815466602643332
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,15,0.14797332882881165
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,float16,255,0.20122132698694864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,1,float16,fp8,255,0.14628266294797262
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,1,0.39458131790161133
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,3,0.38964800039927167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,3,0.28278932968775433
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,1,0.2834933400154114
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,15,0.38758401075998944
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,7,0.3896373510360718
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,15,0.28245333830515545
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,7,0.2834560076395671
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,31,0.3935573498408
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,31,0.2834773262341817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,1,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,63,0.38690133889516193
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,float16,127,0.38554131984710693
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,127,0.28177066644032794
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,1,float16,fp8,63,0.28414400418599445
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,15,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,3,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,1,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,31,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,31,0.015040000279744467
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,1023,0.023200000325838726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,1023,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,2047,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,127,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,2047,0.04131199916203817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,4095,0.0641653339068095
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,8191,0.1909760038057963
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,8191,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,4095,0.10515200098355611
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,float16,16383,0.3633439938227336
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,1,float16,fp8,16383,0.19729600350062051
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,3,0.5522773265838623
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,1,0.7768906752268473
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,1,0.5533013343811035
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,3,0.7625386714935303
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,7,0.7683359781901041
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,15,0.7621973355611166
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,7,0.5529706478118896
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,15,0.553984006245931
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,31,0.762880007425944
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,float16,63,0.7703893184661865
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,31,0.5556960105895996
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,1,float16,fp8,63,0.5543253421783447
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,fp8,1,1.091754674911499
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,float16,3,1.5127894083658855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,float16,1,1.525760014851888
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,fp8,3,1.0961919625600178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,float16,7,1.5144906044006348
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,1,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,fp8,7,1.0951680342356365
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,3,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,7,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,fp8,15,1.0910720030466716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,float16,15,1.5424853960673015
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,15,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,15,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,7,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,float16,31,1.5131306648254395
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,31,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,63,0.0191040001809597
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,1,float16,fp8,31,1.0917599995930989
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,63,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,127,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,255,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,511,0.02526933451493581
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,1023,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,2047,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,511,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,4095,0.17869333426157633
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,1023,0.05394133428732554
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,2047,0.0986400047938029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,4095,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,float16,8191,0.3370613257090251
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,1,float16,fp8,8191,0.17851734161376953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,15,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,7,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,3,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,7,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,3,0.019093333433071773
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,15,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,31,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,63,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,255,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,63,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,127,0.018757333358128864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,511,0.02493866781393687
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,511,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,1023,0.05392533540725708
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,1023,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,2047,0.09896000226338704
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,8191,0.33536001046498615
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,1,0.010693332801262537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,float16,4095,0.17834667364756265
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,8191,0.17783466974894205
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,2047,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,1,float16,fp8,4095,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,1,0.010672000547250112
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,31,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,127,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,127,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,511,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,2047,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,4095,0.015050667027632395
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,8191,0.019472000499566395
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,4095,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,16383,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,8191,0.017055999487638474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,16383,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,fp8,32767,0.03583466758330663
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,3,0.010661333799362183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,1,float16,float16,32767,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,3,0.010992000500361124
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,31,0.010698666175206503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,255,0.01055466632048289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,127,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,1023,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,511,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,2047,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,1023,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,4095,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,8191,0.03548266738653183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,8191,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,16383,0.03752533346414566
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,16383,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,1,0.034128000338872276
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,float16,32767,0.10001599788665771
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,1,0.02731200059254964
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,1,float16,fp8,32767,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,3,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,3,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,7,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,15,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,31,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,15,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,7,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,127,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,63,0.02762666592995326
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,63,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,31,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,127,0.027653334041436512
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,255,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,255,0.027989332874615986
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,511,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,511,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,1023,0.09489066402117412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,1023,0.06010133524735769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,2047,0.10513599713643391
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,2047,0.17731734116872153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,float16,4095,0.3357013463973999
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,1,float16,fp8,4095,0.1826080083847046
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,3,0.011519999553759893
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,1,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,7,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,3,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,15,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,63,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,255,0.01099733387430509
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,255,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,511,0.011514666179815928
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,2047,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,1023,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,4095,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,8191,0.03957333415746689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,4095,0.024901332954565685
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,8191,0.06005866825580597
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,16383,0.06346133351325989
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,16383,0.10308800141016643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,float16,32767,0.187391996383667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,1,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,1,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,1,float16,fp8,32767,0.1071626643339793
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,3,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,3,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,7,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,15,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,7,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,15,0.0580213318268458
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,31,0.05938666562239329
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,63,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,31,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,127,0.06006399790445963
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,63,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,127,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,255,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,255,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,511,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,511,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,1023,0.10752532879511516
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,1023,0.17306667566299438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,fp8,2047,0.1901280085245768
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,1,float16,float16,2047,0.3353653351465861
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,1,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,3,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,1,0.07884799937407176
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,7,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,3,0.07986666758855183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,15,0.10547733306884766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,7,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,31,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,15,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,63,0.07851199805736542
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,31,0.07816533247629802
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,127,0.07850133379300435
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,63,0.10683199763298035
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,255,0.10889599720637004
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,127,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,255,0.0805866668621699
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,fp8,511,0.12390399972597758
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,1,float16,float16,511,0.1786880095799764
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,1,0.14831466476122537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,1,0.19933867454528809
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,3,0.14949867129325867
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,3,0.20190399885177612
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,7,0.14899200201034546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,7,0.20344533522923788
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,15,0.19937600692113241
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,31,0.19934399922688803
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,15,0.1486240029335022
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,31,0.14778666694959006
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,63,0.20242132743199667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,127,0.20189867417017618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,63,0.14899200201034546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,127,0.14828800161679587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,float16,255,0.20121065775553384
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,1,float16,fp8,255,0.14729066689809164
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,3,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,3,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,1,0.012746666868527731
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,15,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,7,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,127,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,255,0.012602667013804117
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,255,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,511,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,1023,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,2047,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,1023,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,4095,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,2047,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,8191,0.10376532872517903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,4095,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,8191,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,16383,0.1896053353945414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,16383,0.10854400197664897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,float16,32767,0.36028265953063965
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,1,float16,fp8,32767,0.19766400257746378
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,1,0.38860801855723065
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,7,0.2851840058962504
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,3,0.28517866134643555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,7,0.38792534669240314
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,1,0.2845013340314229
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,3,0.3916960159937541
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,15,0.3916800022125244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,15,0.2872320016225179
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,31,0.28484266996383667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,63,0.28517866134643555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,31,0.38792534669240314
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,63,0.3916800022125244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,float16,127,0.38756799697875977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,1,float16,fp8,127,0.28279467423756915
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,1,0.772437334060669
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,1,0.5608053207397461
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,3,0.7645920117696127
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,3,0.556714653968811
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,7,0.7652746836344401
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,7,0.556714653968811
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,15,0.7642506758371989
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,15,0.5570559899012247
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,3,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,3,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,31,0.7720959981282552
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,31,0.5601280132929484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,7,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,float16,63,0.7649280230204264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,15,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,1,float16,fp8,63,0.5563733180363973
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,63,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,31,0.015696000307798386
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,63,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,127,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,511,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,255,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,1023,0.02492800106604894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,2047,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,2047,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,1023,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,4095,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,4095,0.0645066648721695
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,8191,0.19011733929316202
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,8191,0.10923199852307637
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,float16,16383,0.3619786500930786
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,3,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,3,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,1,float16,fp8,16383,0.19713600476582846
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,1,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,15,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,31,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,7,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,15,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,31,0.017045332739750545
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,7,0.0191040001809597
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,63,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,127,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,127,0.018746666610240936
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,511,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,511,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,2047,0.08359466989835103
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,2047,0.05359466870625814
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,1023,0.046762665112813316
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,4095,0.14830933014551798
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,1023,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,4095,0.08702400326728821
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,float16,8191,0.279039998849233
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,1,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,1,float16,fp8,8191,0.15429332852363586
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,3,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,15,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,31,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,31,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,127,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,63,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,255,0.01051733394463857
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,511,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,2047,0.016389333953460056
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,1023,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,8191,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,16383,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,1,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,32767,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,fp8,16383,0.022831998765468597
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,1,float16,float16,32767,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,7,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,15,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,31,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,127,0.010709332923094431
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,63,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,1023,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,2047,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,511,0.011610666910807291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,4095,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,8191,0.027984000742435455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,4095,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,16383,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,16383,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,fp8,32767,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,1,float16,float16,32767,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,1,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,3,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,7,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,3,0.023546665906906128
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,15,0.027317332724730175
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,7,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,31,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,63,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,31,0.027280000348885853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,15,0.023557332654794056
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,127,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,127,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,255,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,511,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,2047,0.14028799533843994
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,511,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,1023,0.07508799930413564
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,2047,0.08601066470146179
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,1023,0.049829334020614624
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,fp8,4095,0.1460906664530436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,1,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,1,float16,float16,4095,0.2628320058186849
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,7,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,15,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,31,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,127,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,63,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,255,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,127,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,4095,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,2047,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,4095,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,8191,0.056661332647005715
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,16383,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,1,0.03718933214743932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,32767,0.17340266704559326
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,8191,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,float16,16383,0.0962559978167216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,1,0.047456001242001854
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,1,float16,fp8,32767,0.1071626643339793
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,3,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,7,0.046069333950678505
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,3,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,7,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,31,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,31,0.035530666510264076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,15,0.03723733375469843
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,15,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,63,0.04571733375390371
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,63,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,127,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,127,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,255,0.04915733138720194
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,255,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,511,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,1023,0.13380266229311624
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,1023,0.08498133222262065
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,511,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,float16,2047,0.2566773295402527
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,1,0.0846560001373291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,1,0.061797335743904114
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,1,float16,fp8,2047,0.14642666776974997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,3,0.08330133557319641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,7,0.08331733445326488
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,3,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,15,0.08294400076071422
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,7,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,15,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,63,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,127,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,31,0.08293866614500682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,127,0.061759998401006065
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,31,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,255,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,511,0.09694400429725647
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,float16,511,0.13737600048383078
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,255,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,1,0.11334932843844096
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,1,0.1544533371925354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,1,float16,fp8,63,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,7,0.15598932902018228
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,3,0.15530666708946228
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,3,0.11500266194343567
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,15,0.15752533078193665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,15,0.11505599816640218
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,31,0.15481600165367126
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,7,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,63,0.15598932902018228
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,31,0.11400533715883891
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,127,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,63,0.11500799655914307
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,1,0.012853333105643591
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,127,0.15428800384203592
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,float16,255,0.15428266922632852
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,1,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,1,float16,fp8,255,0.11435199777285258
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,15,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,255,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,255,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,511,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,1023,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,1023,0.016757333030303318
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,2047,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,2047,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,4095,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,4095,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,8191,0.0993226667245229
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,16383,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,8191,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,16383,0.18057066202163696
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,float16,32767,0.3428693215052287
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,1,float16,fp8,32767,0.19691733519236246
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,1,0.2995199958483378
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,3,0.21742399533589682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,1,0.21741332610448202
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,3,0.30190932750701904
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,7,0.2995306650797526
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,7,0.21779733896255493
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,15,0.29713600873947144
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,31,0.21742399533589682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,15,0.21742933988571167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,31,0.2998720010121663
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,63,0.29577066500981647
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,127,0.21573332945505777
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,float16,127,0.29781333605448407
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,1,float16,fp8,63,0.2194719910621643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,1,0.5785866578420004
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,1,0.420693318049113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,3,0.4227466583251953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,7,0.5782186587651571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,7,0.4203519821166992
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,31,0.5782186587651571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,15,0.5829973220825195
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,15,0.4230773448944092
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,3,0.5785653193791708
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,31,0.4210346539815267
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,31,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,15,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,float16,63,0.5843626658121744
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,1,float16,fp8,63,0.41999999682108563
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,63,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,255,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,127,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,1023,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,2047,0.049839998284975685
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,2047,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,4095,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,8191,0.1474613348642985
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,4095,0.0529120018084844
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,float16,16383,0.2773333390553792
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,1,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,8191,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,1,float16,fp8,16383,0.1537866691748301
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,31,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,63,0.016048000504573185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,127,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,63,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,511,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,255,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,511,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,255,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,1023,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,15,0.016832000265518825
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,1023,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,2047,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,2047,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,8191,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,4095,0.06552533308664958
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,4095,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,8191,0.19131733973821005
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,3,0.010698666175206503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,fp8,16383,0.19914132356643677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,1,float16,float16,16383,0.36232535044352215
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,31,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,255,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,511,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,1023,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,2047,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,8191,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,2047,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,4095,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,8191,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,16383,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,32767,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,16383,0.020138667275508244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,32767,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,float16,65535,0.05632533133029938
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,1,float16,fp8,65535,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,31,0.010543999572594961
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,31,0.01055466632048289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,63,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,255,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,63,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,511,0.011306667079528173
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,1023,0.01219733307758967
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,2047,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,4095,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,1023,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,8191,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,4095,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,2047,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,16383,0.03618666778008143
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,16383,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,8191,0.017050666113694508
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,32767,0.05769066512584686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,float16,65535,0.0993226667245229
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,32767,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,1,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,1,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,1,float16,fp8,65535,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,3,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,7,0.021498667697111767
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,7,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,15,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,15,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,31,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,127,0.021498667697111767
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,63,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,63,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,127,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,255,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,511,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,1023,0.054287999868392944
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,2047,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,2047,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,4095,0.17851734161376953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,4095,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,1023,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,1,0.010666667173306147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,3,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,1,0.010954666882753372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,fp8,8191,0.1790293256441752
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,1,float16,float16,8191,0.33433600266774494
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,3,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,15,0.010687999427318573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,31,0.010506667196750641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,127,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,255,0.010480000327030817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,511,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,255,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,1023,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,4095,0.020848001043001812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,2047,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,8191,0.038906666139761605
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,8191,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,16383,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,32767,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,16383,0.039605334401130676
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,1,0.03344533344109853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,32767,0.06010133524735769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,float16,65535,0.1858560045560201
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,1,0.02902399996916453
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,3,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,3,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,1,float16,fp8,65535,0.10308800141016643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,7,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,7,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,15,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,15,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,63,0.03345600018898646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,31,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,127,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,31,0.03446933378775915
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,127,0.028330666323502857
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,63,0.02834133307139079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,255,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,511,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,1023,0.09490133325258891
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,511,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,255,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,2047,0.17835734287897745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,1023,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,2047,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,float16,4095,0.33604268232981366
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,1,float16,fp8,4095,0.18278400103251138
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,1,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,1,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,3,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,3,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,7,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,15,0.05801066756248474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,15,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,7,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,31,0.05769066512584686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,31,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,63,0.057664001981417336
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,127,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,127,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,63,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,255,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,511,0.09727999567985535
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,255,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,511,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,float16,1023,0.17289066314697266
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,1,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,3,0.10785067081451416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,1,float16,fp8,1023,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,1,0.08020266890525818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,7,0.10751466949780782
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,31,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,15,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,7,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,15,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,3,0.07850133379300435
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,31,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,63,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,127,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,63,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,127,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,255,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,3,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,3,0.011626667032639185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,255,0.08056533336639404
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,float16,511,0.1800533334414164
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,1,float16,fp8,511,0.12494933605194092
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,7,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,31,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,63,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,127,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,63,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,255,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,511,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,1023,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,2047,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,4095,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,2047,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,4095,0.024933333198229473
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,8191,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,8191,0.06144533554712931
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,16383,0.10342933734258015
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,16383,0.06283199787139893
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,32767,0.1877280076344808
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,32767,0.10718400279680888
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,1,0.20241065820058188
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,float16,65535,0.3568640152613322
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,1,float16,fp8,65535,0.1969546675682068
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,3,0.2015626629193624
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,3,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,1,0.15017599860827127
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,7,0.20191999276479086
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,7,0.15001599987347922
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,15,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,31,0.149807999531428
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,63,0.2032639980316162
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,31,0.2032639980316162
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,15,0.1493280033270518
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,63,0.14984533190727234
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,127,0.2015786568323771
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,127,0.14847999811172485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,float16,255,0.20309333006540933
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,1,float16,fp8,255,0.14779733618100485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,1,0.39458131790161133
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,1,0.2872320016225179
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,3,0.3916800022125244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,3,0.28757333755493164
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,7,0.2868959903717041
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,15,0.3916800022125244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,15,0.2855253418286641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,7,0.3916800022125244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,31,0.3945866823196411
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,1,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,31,0.28756799300511676
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,63,0.3916800022125244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,63,0.28757866223653156
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,3,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,7,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,15,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,float16,127,0.39322133858998615
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,15,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,1,0.012938667088747025
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,1,float16,fp8,127,0.283135990301768
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,63,0.012800000607967377
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,255,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,127,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,511,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,1023,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,1023,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,511,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,2047,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,2047,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,255,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,4095,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,8191,0.10443733135859172
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,4095,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,8191,0.06414400041103363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,16383,0.10854400197664897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,16383,0.18911999464035034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,float16,32767,0.36130134264628094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,1,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,3,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,7,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,63,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,15,0.012944000462690989
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,31,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,31,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,63,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,1,float16,fp8,32767,0.1971199909845988
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,255,0.01470400020480156
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,255,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,1023,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,2047,0.05186133086681366
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,4095,0.08294400076071422
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,2047,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,4095,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,8191,0.14813866217931113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,8191,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,1,0.010687999427318573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,float16,16383,0.27767467498779297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,16383,0.15428266922632852
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,1,float16,fp8,1023,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,7,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,63,0.01055466632048289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,127,0.01044800008336703
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,255,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,511,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,1023,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,511,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,2047,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,1023,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,4095,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,8191,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,16383,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,8191,0.017743999759356182
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,16383,0.02186666677395503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,32767,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,65535,0.04881600042184194
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,1,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,3,0.010693332801262537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,fp8,65535,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,1,float16,float16,32767,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,7,0.010949333508809408
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,31,0.010666667173306147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,127,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,8191,0.021525333325068157
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,4095,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,16383,0.029690665503342945
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,32767,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,fp8,65535,0.04746133089065552
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,65535,0.07850133379300435
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,1,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,1,float16,float16,32767,0.04915733138720194
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,3,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,3,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,7,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,7,0.017055999487638474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,15,0.019130667050679524
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,63,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,31,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,127,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,255,0.018800000349680584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,127,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,255,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,511,0.027989332874615986
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,1023,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,1023,0.03311466674009959
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,511,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,31,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,2047,0.08433066805203755
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,4095,0.14983999729156494
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,2047,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,4095,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,float16,8191,0.2797279953956604
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,1,float16,fp8,8191,0.15446399648984274
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,3,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,3,0.010960000256697336
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,7,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,31,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,255,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,127,0.01055466632048289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,2047,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,2047,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,4095,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,4095,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,8191,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,16383,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,16383,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,1023,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,32767,0.04914666712284088
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,32767,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,fp8,65535,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,1,float16,float16,65535,0.14011733730634054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,1,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,1,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,3,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,7,0.027994667490323383
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,7,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,15,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,15,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,31,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,31,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,63,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,63,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,127,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,255,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,255,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,1023,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,2047,0.14045866330464682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,2047,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,511,0.04572799801826477
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,4095,0.26469866434733075
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,511,0.03514133393764496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,1,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,fp8,4095,0.1474613348642985
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,1,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,3,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,1,float16,float16,1023,0.07541866600513458
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,7,0.0460746685663859
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,3,0.0365280012289683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,7,0.036501333117485046
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,15,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,15,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,31,0.03581333408753077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,63,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,31,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,127,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,127,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,63,0.04574933151404063
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,255,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,511,0.07611733178297679
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,255,0.04952000081539154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,511,0.05564799904823303
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,fp8,1023,0.08396266897519429
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,1,float16,float16,1023,0.13397866487503052
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,1,0.08362133304278056
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,3,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,3,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,7,0.08328000207742055
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,7,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,15,0.08396266897519429
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,31,0.08363200227419536
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,1,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,15,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,31,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,127,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,63,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,127,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,255,0.08463999629020691
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,511,0.09693333506584167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,1,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,511,0.1360266705354055
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,fp8,255,0.06348266700903575
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,1,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,1,float16,float16,63,0.08260799944400787
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,7,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,15,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,127,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,31,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,127,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,255,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,1023,0.014335999886194864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,1023,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,4095,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,8191,0.05737066765626272
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,8191,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,2047,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,4095,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,16383,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,16383,0.09557333588600159
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,32767,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,32767,0.1728853384653727
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,float16,65535,0.32920533418655396
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,1,float16,fp8,65535,0.1950719952583313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,1,0.11708266536394756
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,3,0.15633066495259604
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,3,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,7,0.15651733676592508
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,1,0.15598932902018228
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,7,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,15,0.15684266885121664
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,31,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,15,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,31,0.15803200006484985
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,63,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,63,0.15428266922632852
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,127,0.1153546671072642
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,255,0.15820800264676413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,fp8,255,0.11570133765538533
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,1,float16,float16,127,0.1558133363723755
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,1,0.29576534032821655
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,1,0.21914132436116537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,3,0.2988373239835103
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,3,0.21742933988571167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,7,0.2964479923248291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,7,0.2187946637471517
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,15,0.298144002755483
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,31,0.21984533468882242
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,63,0.2988373239835103
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,31,0.2964479923248291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,63,0.21709332863489786
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,15,0.21776533126831055
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,float16,127,0.29576534032821655
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,1,0.01250133290886879
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,1,float16,fp8,127,0.21640533208847046
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,7,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,15,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,63,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,255,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,15,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,511,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,1023,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,1023,0.01709866647919019
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,2047,0.03686933219432831
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,4095,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,8191,0.09898133079210918
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,8191,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,2047,0.025605333348115284
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,16383,0.18243199586868286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,16383,0.10854400197664897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,1,0.012944000462690989
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,1,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,3,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,3,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,7,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,float16,32767,0.34355199337005615
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,32767,0.1986560026804606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,1,float16,fp8,4095,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,15,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,31,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,63,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,63,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,255,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,511,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,1023,0.019098666807015736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,511,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,2047,0.039605334401130676
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,1023,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,2047,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,4095,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,127,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,4095,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,8191,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,16383,0.19098132848739624
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,8191,0.06419200201829274
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,16383,0.10924800237019856
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,float16,32767,0.36232535044352215
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,1,float16,fp8,32767,0.19729600350062051
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,7,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,63,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,31,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,127,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,255,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,511,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,511,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,1023,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,2047,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,4095,0.015050667027632395
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,16383,0.019130667050679524
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,16383,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,32767,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,65535,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,32767,0.025248001019159954
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,float16,131071,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,131071,0.04166933397452036
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,3,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,1,float16,fp8,65535,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,63,0.0106133334338665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,511,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,255,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,1023,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,1023,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,2047,0.016037333756685257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,16383,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,8191,0.020138667275508244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,32767,0.039274667700131737
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,1,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,65535,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,65535,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,1,0.014650666465361914
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,float16,131071,0.10001066327095032
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,131071,0.060421332716941833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,1,float16,fp8,32767,0.025610665480295818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,3,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,7,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,31,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,31,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,255,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,255,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,1023,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,1023,0.023221333821614582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,2047,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,2047,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,4095,0.10579733053843181
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,4095,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,8191,0.19131199518839517
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,8191,0.10923199852307637
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,3,0.010469333579142889
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,float16,16383,0.3643733263015747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,7,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,7,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,1,float16,fp8,16383,0.1991680065790812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,63,0.010773333410422007
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,511,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,1023,0.012495999534924826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,4095,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,2047,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,4095,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,8191,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,16383,0.038917332887649536
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,32767,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,32767,0.040618665516376495
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,65535,0.10171199838320415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,1,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,65535,0.060789331793785095
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,fp8,131071,0.10548266768455505
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,1,float16,float16,131071,0.18995199600855509
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,7,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,15,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,15,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,7,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,3,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,31,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,63,0.0191040001809597
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,127,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,127,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,511,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,1023,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,511,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,1023,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,2047,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,2047,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,4095,0.1790293256441752
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,4095,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,1,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,1,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,float16,8191,0.336026668548584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,1,float16,fp8,8191,0.17851734161376953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,3,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,7,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,3,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,15,0.03344533344109853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,15,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,31,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,63,0.02698666602373123
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,63,0.034485332667827606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,255,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,31,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,7,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,127,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,127,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,511,0.055642664432525635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,255,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,511,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,1023,0.09489066402117412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,1023,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,1,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,1,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,fp8,2047,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,3,0.0580213318268458
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,7,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,3,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,1,float16,float16,2047,0.17834667364756265
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,7,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,15,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,31,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,63,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,63,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,15,0.04538666705290476
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,31,0.05938666562239329
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,127,0.04506133496761322
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,255,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,255,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,511,0.068271999557813
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,511,0.09693333506584167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,127,0.05973866581916809
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,float16,1023,0.17646400133768717
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,1,float16,fp8,1023,0.10717333356539409
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,7,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,63,0.010661333799362183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,15,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,511,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,511,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,2047,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,8191,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,8191,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,16383,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,32767,0.10276266932487488
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,32767,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,65535,0.10307733217875163
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,65535,0.1860213279724121
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,1,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,float16,131071,0.35686933994293213
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,3,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,1,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,1,float16,fp8,131071,0.18806399901707968
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,3,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,7,0.0798773318529129
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,7,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,15,0.10889066259066264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,31,0.10683199763298035
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,31,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,15,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,63,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,63,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,127,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,255,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,255,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,511,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,float16,511,0.1781866749127706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,1,0.20138132572174072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,1,float16,fp8,127,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,1,0.14845333496729532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,3,0.20325867335001627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,3,0.14899200201034546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,7,0.20550400018692017
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,15,0.14966400464375815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,7,0.15035733580589294
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,15,0.20309333006540933
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,31,0.14813866217931113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,63,0.14915733536084494
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,63,0.20344533522923788
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,31,0.202239990234375
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,1,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,3,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,127,0.2053119937578837
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,7,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,3,0.011861333002646765
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,127,0.14813866217931113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,15,0.011258666714032492
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,31,0.011658667276302973
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,fp8,255,0.14899200201034546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,1,float16,float16,255,0.20460800329844156
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,31,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,127,0.011285333583752314
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,127,0.011242666592200598
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,1023,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,255,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,1023,0.014725333700577417
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,2047,0.01912533367673556
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,4095,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,4095,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,8191,0.060080001751581825
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,8191,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,32767,0.18824533621470133
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,16383,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,32767,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,16383,0.10411733388900757
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,float16,65535,0.36061867078145343
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,1,0.010768000036478043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,1,float16,fp8,65535,0.1971199909845988
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,7,0.01098666712641716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,15,0.011258666714032492
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,31,0.010549332946538925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,31,0.010794666906197866
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,63,0.010858666151762009
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,63,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,15,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,255,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,127,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,1023,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,2047,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,4095,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,4095,0.025248001019159954
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,8191,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,8191,0.04131199916203817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,16383,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,16383,0.1037600040435791
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,2047,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,1,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,32767,0.18826133012771606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,32767,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,7,0.010661333799362183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,float16,65535,0.35891199111938477
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,31,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,1,float16,fp8,65535,0.19710934162139893
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,7,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,127,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,63,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,511,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,1023,0.01055466632048289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,2047,0.012661332885424295
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,2047,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,4095,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,4095,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,8191,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,16383,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,16383,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,32767,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,8191,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,32767,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,65535,0.02492800106604894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,65535,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,1,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,fp8,131071,0.03140799949566523
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,3,0.010672000547250112
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,7,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,7,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,15,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,127,0.010874666273593903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,1,float16,float16,131071,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,63,0.010703999549150467
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,255,0.010661333799362183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,31,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,511,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,1023,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,8191,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,16383,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,511,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,16383,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,32767,0.024885334074497223
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,32767,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,65535,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,65535,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,1,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,float16,131071,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,1,float16,fp8,131071,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,127,0.013306666165590286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,511,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,255,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,1023,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,4095,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,4095,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,2047,0.04128533353408178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,8191,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,8191,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,2047,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,16383,0.1901280085245768
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,1,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,3,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,16383,0.10789866248766582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,15,0.010496000448862711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,fp8,32767,0.19713066021601358
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,1,float16,float16,32767,0.3609600067138672
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,31,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,127,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,1023,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,1023,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,255,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,4095,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,8191,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,4095,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,8191,0.02012266715367635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,16383,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,32767,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,16383,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,32767,0.02698666602373123
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,65535,0.06246933341026306
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,65535,0.04266133407751719
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,float16,131071,0.10205866893132527
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,1,float16,fp8,131071,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,1,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,3,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,7,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,15,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,255,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,127,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,63,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,1023,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,1023,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,4095,0.0645066648721695
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,2047,0.0631520003080368
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,4095,0.10546666383743286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,2047,0.04301866888999939
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,8191,0.1914880077044169
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,1,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,8191,0.10991467038790385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,3,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,7,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,3,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,7,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,fp8,16383,0.19899733861287436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,15,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,15,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,1,float16,float16,16383,0.3630026578903198
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,31,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,31,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,63,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,63,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,127,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,511,0.035173334181308746
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,1023,0.05461333195368449
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,511,0.02489600082238515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,1023,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,2047,0.10000000397364299
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,2047,0.06278933087984721
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,1,0.0341333324710528
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,1,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,float16,4095,0.1800533334414164
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,1,float16,fp8,4095,0.10103999574979146
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,7,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,3,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,7,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,3,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,15,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,31,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,15,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,127,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,255,0.03752533346414566
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,63,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,63,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,31,0.034815999368826546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,127,0.027989332874615986
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,255,0.02698666602373123
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,1,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,511,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,1023,0.09489599863688152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,511,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,1023,0.060080001751581825
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,float16,2047,0.17987734079360962
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,3,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,1,float16,fp8,2047,0.10512533783912659
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,31,0.010666667173306147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,15,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,63,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,127,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,511,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,1023,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,511,0.012282667060693106
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,2047,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,8191,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,16383,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,16383,0.024901332954565685
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,32767,0.040965333580970764
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,32767,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,65535,0.10273599624633789
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,1,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,65535,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,3,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,1,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,float16,131071,0.18961066007614136
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,1,float16,fp8,131071,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,3,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,7,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,7,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,15,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,15,0.058042665322621666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,31,0.058703998724619545
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,31,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,63,0.0580213318268458
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,127,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,127,0.04538666705290476
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,255,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,63,0.04541333516438802
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,255,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,511,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,511,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,1,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,float16,1023,0.17494932810465494
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,1,0.0788266658782959
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,1,float16,fp8,1023,0.10684266686439514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,3,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,3,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,7,0.10785599549611409
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,7,0.07849066456158955
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,15,0.1071573297182719
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,15,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,31,0.10684266686439514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,31,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,63,0.10751466949780782
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,63,0.07815466821193695
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,127,0.10754666725794475
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,127,0.0795306662718455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,255,0.10717866818110149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,255,0.08157333234945933
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,3,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,15,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,15,0.010549332946538925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,fp8,511,0.12424000104268391
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,1,float16,float16,511,0.1807360053062439
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,127,0.010698666175206503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,63,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,511,0.01099733387430509
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,511,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,2047,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,4095,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,8191,0.024906667570273083
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,8191,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,16383,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,16383,0.03994133323431015
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,32767,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,32767,0.10205333431561787
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,65535,0.18586132923762003
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,1,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,65535,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,fp8,131071,0.18858667214711508
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,1,float16,float16,131071,0.3561546802520752
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,1,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,3,0.05598400036493937
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,7,0.04606399933497111
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,3,0.044341335693995156
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,7,0.056661332647005715
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,63,0.057664001981417336
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,15,0.05769066512584686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,31,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,63,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,15,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,127,0.07814933359622955
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,31,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,127,0.06791999936103821
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,255,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,255,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,511,0.17220266660054526
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,1023,0.3135146697362264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,2047,0.6432426770528158
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,511,0.19110933939615884
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,2047,0.5946026643117269
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,1023,0.3421866496404012
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,float16,4095,1.24618132909139
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,1,float16,fp8,4095,1.1593386332194011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,3,0.055642664432525635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,7,0.05598400036493937
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,1,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,1,0.053930665055910744
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,15,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,3,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,7,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,15,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,31,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,31,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,127,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,63,0.06586666901906331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,255,0.10275200009346008
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,63,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,255,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,127,0.06824000179767609
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,511,0.19114667177200317
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,1023,0.34219201405843097
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,511,0.17220266660054526
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,2047,0.6432480017344157
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,1023,0.3135146697362264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,2047,0.5946079889933268
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,float16,4095,1.3165173530578613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,1,0.05563200016816457
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,1,float16,fp8,4095,1.1644586722056072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,1,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,3,0.05788266658782959
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,3,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,7,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,15,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,31,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,63,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,7,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,63,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,15,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,31,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,127,0.06791999936103821
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,255,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,255,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,127,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,511,0.1723733345667521
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,511,0.19081066052118936
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,1023,0.3421866496404012
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,1023,0.31351999441782635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,2047,0.6703786849975586
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,2047,0.5959733327229818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,float16,4095,1.4093653361002605
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,1,0.015002666662136713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,1,float16,fp8,4095,1.1699199676513672
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,1,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,3,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,127,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,31,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,255,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,255,0.014885333677132925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,511,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,511,0.01669866715868314
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,2047,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,2047,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,1023,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,1023,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,float16,4095,0.06247999767462412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,1,0.00877333308259646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,1,0.008517333616813024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,1,float16,fp8,4095,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,3,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,3,0.008778666456540426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,15,0.008879999940594038
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,7,0.008752000207702318
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,7,0.008762666955590248
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,15,0.008858666444818178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,63,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,127,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,255,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,511,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,511,0.0249439999461174
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,1023,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,2047,0.043696001172065735
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,1023,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,2047,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,4095,0.0744053324063619
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,8191,0.129013329744339
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,4095,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,8191,0.11675199866294861
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,float16,16383,0.23654399315516153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,1,float16,fp8,16383,0.2123039960861206
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,1,0.00877333308259646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,1,0.008639999975760778
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,3,0.008762666955590248
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,15,0.008954666554927826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,7,0.008650666723648706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,3,0.008746666833758354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,15,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,7,0.00877333308259646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,63,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,31,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,511,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,511,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,2047,0.043706665436426796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,1023,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,1023,0.03344533344109853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,4095,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,2047,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,8191,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,4095,0.06791999936103821
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,float16,16383,0.2362026572227478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,8191,0.11674666404724121
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,1,float16,fp8,16383,0.2126506765683492
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,1,0.008602666358153025
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,1,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,3,0.008885333314538002
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,3,0.0085333331177632
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,7,0.008869333192706108
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,15,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,7,0.008746666833758354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,15,0.008879999940594038
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,127,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,255,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,511,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,511,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,1023,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,1023,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,2047,0.04365866879622141
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,4095,0.06791999936103821
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,2047,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,4095,0.07442666590213776
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,8191,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,float16,16383,0.23586666584014893
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,8191,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,1,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,1,float16,fp8,16383,0.21333332856496176
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,1,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,7,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,31,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,63,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,511,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,2047,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,4095,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,4095,0.01807466646035512
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,8191,0.01979200045267741
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,float16,16383,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,8191,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,1,float16,fp8,16383,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,3,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,1,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,7,0.010522666076819101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,1023,0.03755199909210205
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,1023,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,2047,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,511,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,4095,0.11366400122642517
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,2047,0.05153066913286845
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,4095,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,8191,0.2153866688410441
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,float16,16383,0.4164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,8191,0.17442133029301962
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,1,0.008879999940594038
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,1,float16,fp8,16383,0.3380959828694661
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,7,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,15,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,31,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,63,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,63,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,255,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,1023,0.03751999884843826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,511,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,2047,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,1023,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,2047,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,4095,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,4095,0.11331733067830403
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,8191,0.17425066232681274
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,8191,0.2153493364651998
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,fp8,16383,0.33843199412027997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,3,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,1,float16,float16,16383,0.4164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,1,0.008912000184257826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,1,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,3,0.00914666677514712
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,63,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,511,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,1023,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,1023,0.03073066721359889
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,2047,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,2047,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,4095,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,4095,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,8191,0.1742560068766276
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,1,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,8191,0.21538132429122925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,float16,16383,0.41830400625864667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,1,0.0106133334338665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,1,float16,fp8,16383,0.339466651280721
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,15,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,15,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,255,0.01089599976936976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,4095,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,4095,0.018757333358128864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,8191,0.020842666427294414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,1,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,3,0.10476799805959065
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,float16,16383,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,1,0.1009226640065511
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,3,0.08257066706816356
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,7,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,15,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,7,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,15,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,31,0.1256053348382314
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,63,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,127,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,63,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,127,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,31,0.10885866483052571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,255,0.1991680065790812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,255,0.2239146629969279
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,511,0.3736106554667155
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,1023,0.6717493534088135
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,511,0.33604268232981366
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,1,0.10102933645248413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,float16,2047,1.2675413290659587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,1,0.08056533336639404
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,1023,0.6164533297220866
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,1,float16,fp8,2047,1.1764053503672283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,3,0.08226666847864787
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,7,0.0867039958635966
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,15,0.12524267037709555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,15,0.10889066259066264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,7,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,3,0.10410133004188538
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,31,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,63,0.12595733006795248
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,63,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,127,0.15018133322397867
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,31,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,127,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,255,0.22357332706451416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,511,0.3736106554667155
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,255,0.19918400049209595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,511,0.3360373179117839
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,1023,0.6727680365244547
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,1023,0.6171306769053141
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,fp8,2047,1.1767626603444417
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,1,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,1,float16,float16,2047,1.3199360370635986
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,1,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,3,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,7,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,3,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,7,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,15,0.10890133182207744
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,63,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,63,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,31,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,15,0.12525332967440286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,127,0.1293706695238749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,127,0.15018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,31,0.12562666336695352
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,255,0.223578671614329
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,255,0.19882667064666748
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,511,0.37358399232228595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,1023,0.6167893409729004
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,511,0.33638401826222736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,2047,1.3902613321940105
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,float16,1023,0.7045119603474935
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,1,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,1,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,3,0.02013333390156428
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,1,float16,fp8,2047,1.1827253500620525
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,3,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,7,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,7,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,15,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,31,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,15,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,31,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,63,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,63,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,127,0.020842666427294414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,255,0.02048533285657565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,255,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,511,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,127,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,1023,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,511,0.0249439999461174
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,1,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,2047,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,float16,2047,0.06417599817117055
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,1,float16,fp8,1023,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,3,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,7,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,63,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,127,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,127,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,255,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,255,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,511,0.03547733277082443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,511,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,1023,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,2047,0.11127466956774394
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,4095,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,4095,0.17442133029301962
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,1023,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,2047,0.09455466270446777
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,8191,0.41232534249623615
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,8191,0.33639466762542725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,1,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,fp8,16383,0.65774933497111
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,1,float16,float16,16383,0.8292693297068278
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,7,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,3,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,15,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,31,0.014901333798964819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,127,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,255,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,255,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,127,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,511,0.03585066646337509
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,511,0.03516799956560135
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,2047,0.1109386682510376
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,2047,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,4095,0.21163199345270792
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,1023,0.05492266515890757
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,1023,0.061792001128196716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,4095,0.17507733901341757
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,8191,0.33604268232981366
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,8191,0.41813333829243976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,float16,16383,0.8400213718414307
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,1,float16,fp8,16383,0.6574293375015259
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,3,0.01534933348496755
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,31,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,63,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,127,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,255,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,255,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,127,0.02083733429511388
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,511,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,2047,0.1109386682510376
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,511,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,1023,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,2047,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,4095,0.21196800470352173
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,1023,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,4095,0.17493333419164023
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,8191,0.33604268232981366
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,1,0.011610666910807291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,8191,0.4164479970932007
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,fp8,16383,0.6590880155563354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,1,float16,float16,16383,0.8724479675292969
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,3,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,7,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,31,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,15,0.010672000547250112
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,127,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,255,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,1023,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,16383,0.03958933303753535
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,16383,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,4095,0.01844266677896182
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,fp8,8191,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,1,float16,float16,8191,0.022890667120615642
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,1,0.1544533371925354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,3,0.20121065775553384
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,3,0.15990933775901794
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,7,0.2051466703414917
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,7,0.16622933745384216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,1,0.1967786749204
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,31,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,63,0.2461013396581014
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,15,0.244053324063619
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,63,0.21163199345270792
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,31,0.24439465999603271
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,15,0.21130132675170898
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,127,0.2930346727371216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,255,0.38860801855723065
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,511,0.6601386864980062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,255,0.43878400325775146
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,511,0.736255963643392
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,float16,1023,1.3284533023834229
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,1023,1.2141173680623372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,1,0.15429866313934326
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,3,0.20104533433914185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,1,0.19660800695419312
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,3,0.16008533040682474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,15,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,1,float16,fp8,127,0.2505173285802205
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,31,0.2461013396581014
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,7,0.16810667514801025
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,7,0.20497065782546997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,63,0.24712532758712769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,31,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,15,0.244053324063619
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,63,0.211626668771108
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,127,0.250874658425649
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,511,0.7365972995758057
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,511,0.6608213186264038
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,255,0.44014934698740643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,255,0.3896266619364421
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,1,0.1967786749204
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,fp8,1023,1.2144266764322917
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,1023,1.3987840016682942
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,1,0.154448002576828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,1,float16,float16,127,0.29338133335113525
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,7,0.16810667514801025
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,15,0.24405866861343384
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,3,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,3,0.1599146624406179
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,7,0.20326934258143106
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,31,0.24610666433970133
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,63,0.24643733104070029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,15,0.2112906575202942
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,63,0.21230934063593546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,127,0.2937333385149638
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,511,0.7860960165659586
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,127,0.25225067138671875
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,255,0.3896373510360718
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,255,0.4394666751225789
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,float16,1023,1.4346240361531575
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,31,0.21126933892567953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,511,0.6601386864980062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,1,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,1,float16,fp8,1023,1.222314675649007
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,7,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,3,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,1,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,3,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,15,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,31,0.026949333647886913
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,15,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,63,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,31,0.029333333174387615
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,127,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,7,0.026954665780067444
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,63,0.027327999472618103
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,255,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,127,0.02897600084543228
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,511,0.03993066648642222
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,255,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,float16,1023,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,511,0.03448000053564707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,1,0.3012053370475769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,3,0.31249066193898517
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,1,0.38656532764434814
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,7,0.3240906596183777
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,3,0.3959519863128662
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,1,float16,fp8,1023,0.043696001172065735
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,15,0.41471465428670246
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,7,0.40174933274586994
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,15,0.48076268037160236
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,31,0.4835040171941121
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,63,0.4858826796213786
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,127,0.4944266478220622
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,31,0.41574398676554364
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,63,0.4174506664276123
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,127,0.5802453358968099
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,float16,255,0.8697173595428467
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,1,float16,fp8,255,0.7700479825337728
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,1,0.385535995165507
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,1,0.3022453387578328
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,3,0.39560532569885254
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,3,0.3138773242632548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,7,0.3275093237559001
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,31,0.4872586727142334
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,31,0.4164533217748006
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,7,0.4020906686782837
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,15,0.416431983311971
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,15,0.4814506769180298
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,63,0.48895466327667236
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,127,0.4957866668701172
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,255,0.8707413673400879
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,63,0.4184746742248535
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,float16,127,0.581285317738851
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,3,0.3959466616312663
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,1,float16,fp8,255,0.7707253297170004
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,1,0.385535995165507
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,1,0.30190932750701904
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,3,0.31249066193898517
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,7,0.40277334054311115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,15,0.4164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,7,0.3285280068715413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,15,0.4818026622136434
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,31,0.4875946839650472
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,63,0.41847999890645343
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,31,0.4160533348719279
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,127,0.5806080102920532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,127,0.4957866668701172
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,63,0.4889599879582723
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,fp8,255,0.7707253297170004
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,1,float16,float16,255,0.9524853229522705
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,1,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,3,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,1,0.04779199759165446
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,7,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,3,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,7,0.04541333516438802
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,15,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,15,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,31,0.045381332437197365
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,31,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,63,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,63,0.0481333335240682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,127,0.04950400193532308
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,float16,255,0.050853331883748375
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,127,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,1,float16,fp8,255,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,1,0.7663040161132812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,3,0.785749355951945
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,1,0.5983573198318481
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,3,0.6208853324254354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,7,0.639520009358724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,7,0.7947893142700195
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,15,0.9559040069580078
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,15,0.8261973063151041
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,31,0.9630773067474365
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,31,0.8241493701934814
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,63,0.9668107032775879
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,float16,127,1.153872013092041
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,63,0.8296213150024414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,3,0.7864267031351725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,3,0.6205546855926514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,1,float16,fp8,127,0.9838933149973551
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,1,0.7659520308176676
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,1,0.5983573198318481
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,7,0.7995733420054117
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,15,0.8275573253631592
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,7,0.645957350730896
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,31,0.9695519606272379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,15,0.9586293697357178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,31,0.8268799781799316
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,63,0.831658681233724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,63,0.971946636835734
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,float16,127,1.1688960393269856
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,1,float16,fp8,127,0.9852586587270101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,1,0.5966560045878092
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,1,0.7662879625956217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,3,0.7879892985026041
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,15,0.9576053619384766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,3,0.618837316830953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,7,0.8036746978759766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,7,0.6480213403701782
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,15,0.8278986612955729
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,31,0.9695573647816976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,31,0.8268799781799316
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,63,0.8313226699829102
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,63,0.9801440238952637
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,1,0.08054933448632558
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,3,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,1,0.08874133229255676
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,fp8,127,0.9924266338348389
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,1,float16,float16,127,1.2540906270345051
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,3,0.08157866696516673
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,7,0.0890880028406779
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,15,0.08806932965914409
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,15,0.08156266808509827
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,31,0.08839999636014302
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,7,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,31,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,63,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,63,0.08055999875068665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,7,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,3,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,7,0.015087999403476715
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,fp8,127,0.08020266890525818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,3,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,15,0.0207893339296182
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,1,float16,float16,127,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,15,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,31,0.02125866711139679
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,63,0.021173333128293354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,127,0.021125334004561108
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,31,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,63,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,127,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,255,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,511,0.05564799904823303
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,1023,0.09660266836484273
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,511,0.04948799808820089
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,255,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,2047,0.17698132991790771
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,1023,0.08636266986529033
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,2047,0.16008533040682474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,8191,0.6642346779505411
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,4095,0.3053119977315267
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,8191,0.6010880072911581
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,1,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,4095,0.3394560019175212
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,float16,16383,1.5281440416971843
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,3,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,7,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,1,0.015050667027632395
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,7,0.015365333606799444
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,31,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,15,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,1,float16,fp8,16383,1.3105546633402507
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,31,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,15,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,127,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,127,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,63,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,511,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,255,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,1023,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,2047,0.17646400133768717
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,1023,0.08636266986529033
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,511,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,4095,0.3428693215052287
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,2047,0.1599146624406179
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,8191,0.6017760038375854
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,255,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,1,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,8191,0.6700373490651449
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,4095,0.3056640028953552
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,float16,16383,1.6689440409342449
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,3,0.019130667050679524
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,1,float16,fp8,16383,1.3085013230641682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,7,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,1,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,15,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,31,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,15,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,3,0.016421332955360413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,63,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,127,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,63,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,31,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,255,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,127,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,511,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,511,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,255,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,1023,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,2047,0.15991999705632529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,1023,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,4095,0.34116268157958984
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,2047,0.17698667446772257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,4095,0.306005338827769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,8191,0.6812960306803385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,8191,0.6034719944000244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,fp8,16383,1.3738719622294109
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,1,float16,float16,16383,2.011136054992676
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,1,0.01080000028014183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,7,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,15,0.011616000284751257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,63,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,127,0.010960000256697336
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,255,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,511,0.012597333639860153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,1023,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,2047,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,4095,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,8191,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,8191,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,float16,16383,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,1,float16,fp8,16383,0.04301333427429199
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,1,1.5274613698323567
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,3,1.5677493413289387
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,1,1.1898826758066814
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,7,1.586682637532552
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,3,1.2352853616078694
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,7,1.2757279872894287
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,15,1.9061706860860188
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,15,1.6436959902445476
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,31,1.9259732564290364
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,31,1.6423254013061523
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,fp8,63,1.652224063873291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,1,float16,float16,63,1.9553227424621582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,1,1.5264533360799153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,1,1.1905759970347087
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,3,1.574735959370931
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,3,1.2359573046366374
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,7,1.6044212977091472
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,7,1.2917760213216145
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,15,1.9102667172749836
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,15,1.6491573651631672
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,31,1.9539680480957031
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,31,1.6505227088928223
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,float16,63,2.0125013987223306
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,1,float16,fp8,63,1.6675893465677898
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,1,1.192618687947591
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,1,1.527125358581543
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,3,1.5829280217488606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,3,1.2349440256754558
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,7,1.6242292722066243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,7,1.306117296218872
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,15,1.930074691772461
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,15,1.6494933764139812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,31,1.6621227264404297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,31,2.0029333432515464
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,float16,63,2.0309066772460938
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,1,float16,fp8,63,1.6919840176900227
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,1,0.1648906668027242
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,3,0.16366933782895407
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,1,0.1544533371925354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,3,0.1539466679096222
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,7,0.1639840006828308
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,7,0.15428800384203592
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,15,0.1629866659641266
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,15,0.1544533371925354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,31,0.16452266772588095
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,31,0.15410666664441428
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,float16,63,0.16434666514396667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,1,float16,fp8,63,0.15411200126012167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,float16,1,3.220656077067057
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,float16,3,3.3027467727661133
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,fp8,1,2.5026559829711914
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,fp8,3,2.5792853037516275
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,fp8,7,2.6773974100748696
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,float16,7,3.3334614435831704
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,fp8,15,3.314688046773275
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,float16,15,3.858938535054525
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,float16,31,3.9176534016927085
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,1,float16,fp8,31,3.313685417175293
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,float16,1,3.307525316874186
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,fp8,1,2.535600026448568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,float16,3,3.3612852096557617
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,fp8,3,2.586106618245443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,fp8,7,2.710511843363444
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,float16,7,3.393365224202474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,fp8,15,3.3358453114827475
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,float16,15,3.8725973765055337
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,float16,31,3.979946772257487
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,1,float16,fp8,31,3.341994603474935
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,float16,1,3.334831873575846
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,fp8,1,2.569386641184489
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,fp8,3,2.617856025695801
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,float16,3,3.3878987630208335
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,fp8,7,2.731509208679199
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,float16,7,3.4184694290161133
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,float16,15,3.901616096496582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,fp8,15,3.355994542439779
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,float16,1,0.3145599961280823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,fp8,1,0.29440534114837646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,float16,31,4.015280087788899
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,1,float16,fp8,31,3.350874582926432
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,float16,3,0.31522132953008014
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,fp8,3,0.293722669283549
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,float16,7,0.31455467144648236
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,1,0.029343999922275543
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,1,0.02526933451493581
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,3,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,3,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,float16,15,0.31520533561706543
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,fp8,7,0.2958079973856608
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,fp8,15,0.2940533359845479
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,7,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,float16,31,0.3145386576652527
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,1,float16,fp8,31,0.2937013308207194
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,7,0.025231999655564625
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,31,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,15,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,127,0.04164800047874451
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,63,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,31,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,63,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,127,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,255,0.061754668752352394
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,255,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,511,0.09044800202051799
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,511,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,1023,0.17629865805308023
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,2047,0.32819199562072754
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,2047,0.3039626677831014
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,4095,0.6340266863505045
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,1023,0.1621333360671997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,4095,0.5901653369267782
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,3,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,3,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,float16,8191,1.2441600163777669
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,7,0.02628266563018163
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,1,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,1,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,7,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,1,float16,fp8,8191,1.1654986540476482
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,31,0.03139200061559677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,15,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,63,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,31,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,63,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,127,0.042319998145103455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,127,0.03721600025892258
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,255,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,255,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,511,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,2047,0.32921600341796875
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,1023,0.17629865805308023
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,511,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,2047,0.3039626677831014
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,4095,0.637440005938212
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,4095,0.5901653369267782
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,1023,0.1621386706829071
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,float16,8191,1.2900746663411458
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,3,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,7,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,1,float16,fp8,8191,1.1695733070373535
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,3,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,1,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,15,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,1,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,7,0.02629866699377696
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,15,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,31,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,63,0.03514666606982549
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,31,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,63,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,127,0.04161600023508072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,255,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,511,0.09967466195424397
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,127,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,511,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,255,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,1023,0.17663466930389404
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,1023,0.16196266810099283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,2047,0.328874667485555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,2047,0.30429865916570026
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,4095,0.6553599834442139
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,1,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,float16,8191,1.441450595855713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,4095,0.5918720165888468
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,1,0.011610666910807291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,1,float16,fp8,8191,1.1746986707051594
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,7,0.011285333583752314
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,31,0.011503999431928
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,15,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,63,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,15,0.012538666526476542
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,255,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,255,0.011488000551859537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,2047,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,4095,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,4095,0.026949333647886913
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,float16,8191,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,1,float16,fp8,8191,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,1,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,3,0.036864000062147774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,3,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,7,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,1,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,7,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,15,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,15,0.04474666714668274
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,31,0.04537599782148997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,63,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,63,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,31,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,255,0.08876799543698628
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,255,0.07850133379300435
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,127,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,127,0.053583999474843345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,511,0.14592533310254416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,511,0.13157866398493448
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,1023,0.2597759962081909
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,2047,0.48555199305216473
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,1023,0.2379146615664164
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,2047,0.44970667362213135
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,float16,4095,0.941210667292277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,1,float16,fp8,4095,0.8765493233998617
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,1,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,1,0.03516799956560135
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,3,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,3,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,7,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,7,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,15,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,15,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,31,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,31,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,63,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,63,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,127,0.06010666489601135
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,127,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,255,0.08874133229255676
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,255,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,511,0.145578662554423
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,511,0.13158399860064188
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,1023,0.258730669816335
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,1023,0.2379093368848165
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,2047,0.5114880005518595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,2047,0.45073068141937256
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,float16,4095,1.0514933268229167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,1,float16,fp8,4095,0.8850773175557455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,1,0.014650666465361914
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,3,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,31,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,31,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,127,0.014335999886194864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,255,0.014906667172908783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,2047,0.03959999978542328
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,1023,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,float16,4095,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,2047,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,1,float16,fp8,4095,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,1,0.008629333227872849
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,3,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,1,0.008752000207702318
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,3,0.008858666444818178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,7,0.008559999987483025
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,7,0.00873066671192646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,15,0.008890666688481966
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,31,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,15,0.008933333059151968
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,63,0.010464000205198923
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,127,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,255,0.022533332308133442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,511,0.02661866694688797
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,511,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,1023,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,1023,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,2047,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,2047,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,4095,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,8191,0.12763733665148416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,4095,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,8191,0.11538666486740112
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,fp8,16383,0.2112906575202942
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,1,float16,float16,16383,0.23552000522613525
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,1,0.008698666468262672
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,1,0.008549333239595095
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,3,0.00877333308259646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,3,0.008581333483258883
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,7,0.008672000219424566
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,7,0.008778666456540426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,15,0.008538666491707167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,15,0.009008000294367472
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,31,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,127,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,255,0.02222399910291036
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,511,0.026629333694775898
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,1023,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,511,0.024885334074497223
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,1023,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,2047,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,2047,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,4095,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,4095,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,8191,0.1276586651802063
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,8191,0.11537599563598633
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,float16,16383,0.23586666584014893
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,1,float16,fp8,16383,0.2112906575202942
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,3,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,7,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,31,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,63,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,63,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,127,0.01080000028014183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,255,0.01089599976936976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,1023,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,2047,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,4095,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,4095,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,float16,16383,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,1,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,3,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,7,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,15,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,1,float16,fp8,16383,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,31,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,31,0.018746666610240936
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,127,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,63,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,255,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,255,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,127,0.022895999252796173
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,511,0.024933333198229473
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,511,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,1023,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,2047,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,4095,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,2047,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,4095,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,8191,0.2133386731147766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,8191,0.17234132687250772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,float16,16383,0.4140373468399048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,16383,0.3285333315531413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,1,float16,fp8,1023,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,3,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,31,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,63,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,127,0.016757333030303318
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,511,0.024933333198229473
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,1023,0.03755199909210205
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,1023,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,2047,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,2047,0.055642664432525635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,4095,0.11333333452542622
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,4095,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,8191,0.2136746644973755
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,8191,0.1723733345667521
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,fp8,16383,0.32819199562072754
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,1,float16,float16,16383,0.41576532522837323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,1,0.0106133334338665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,1,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,3,0.011722666521867117
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,7,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,15,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,15,0.010661333799362183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,127,0.01089599976936976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,255,0.010949333508809408
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,1023,0.01251199965675672
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,4095,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,1,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,1,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,3,0.07851199805736542
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,3,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,7,0.06621333460013072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,15,0.09455466270446777
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,15,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,31,0.09523199995358785
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,31,0.0825973351796468
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,63,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,63,0.08328533172607422
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,127,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,127,0.09865599870681763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,7,0.08053333560625713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,255,0.17015467087427774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,255,0.15018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,511,0.2542773286501567
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,1023,0.5063680013020834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,511,0.2828106681505839
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,1023,0.46506667137145996
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,float16,2047,0.9552160104115804
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,1,float16,fp8,2047,0.8857599894205729
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,1,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,fp8,1,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,3,0.07815999786059062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,fp8,3,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,7,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,fp8,7,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,fp8,15,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,31,0.09489066402117412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,fp8,31,0.08362666765848796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,63,0.09658666451772054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,15,0.09489599863688152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,fp8,63,0.08430400490760803
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,fp8,127,0.09899200002352397
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,127,0.11504000425338745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,255,0.17015999555587769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,511,0.28279467423756915
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,fp8,255,0.15019200245539346
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,fp8,511,0.25428799788157147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,1023,0.5324853261311849
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,fp8,1023,0.4650719960530599
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,float16,2047,1.0494293371836345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,1,float16,fp8,2047,0.8929279645284017
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,1,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,1,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,3,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,3,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,7,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,7,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,15,0.01945066700379054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,15,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,31,0.019088000059127808
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,63,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,31,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,63,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,127,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,127,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,255,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,511,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,511,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,1023,0.03822399924198786
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,float16,2047,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,1023,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,1,float16,fp8,2047,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,1,0.014997333288192749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,3,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,7,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,7,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,15,0.019130667050679524
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,15,0.015263999501864115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,31,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,31,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,63,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,255,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,255,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,511,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,511,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,1023,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,1023,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,2047,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,2047,0.0740479975938797
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,4095,0.16810667514801025
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,4095,0.13379733761151633
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,8191,0.31932266553243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,8191,0.25497599442799884
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,float16,16383,0.6212213436762491
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,1,float16,fp8,16383,0.4981813430786133
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,3,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,3,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,7,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,15,0.020799999435742695
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,31,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,63,0.017055999487638474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,127,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,127,0.01708799973130226
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,255,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,255,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,511,0.03549333413441976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,511,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,1023,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,1023,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,2047,0.09386133154233296
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,2047,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,4095,0.16863999764124551
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,4095,0.13448533415794373
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,8191,0.3203413287798564
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,8191,0.2563199996948242
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,float16,16383,0.6401706536610922
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,1,float16,fp8,16383,0.5000640153884888
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,3,0.010885333021481832
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,7,0.010677333921194077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,7,0.010890666395425797
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,63,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,127,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,1023,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,1023,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,4095,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,8191,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,fp8,16383,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,1,float16,float16,16383,0.03993066648642222
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,1,0.14984533190727234
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,1,0.11741866668065389
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,3,0.15377066532770792
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,3,0.12152000268300374
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,7,0.15581867098808289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,7,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,15,0.18466132879257202
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,15,0.16008533040682474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,31,0.1867093245188395
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,31,0.16008533040682474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,63,0.1868799924850464
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,63,0.16059733430544534
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,127,0.22187199195226034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,127,0.19063466787338257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,255,0.33191466331481934
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,255,0.29338133335113525
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,511,0.5553493499755859
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,511,0.49988265832265216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,float16,1023,0.9996000130971273
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,1,float16,fp8,1023,0.9186933040618896
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,1,0.14967466394106546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,fp8,1,0.11742400129636128
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,3,0.1518826683362325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,fp8,3,0.12148800492286682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,7,0.15411733587582907
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,fp8,7,0.12731200456619263
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,15,0.1843199928601583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,fp8,15,0.16009066502253214
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,31,0.18653867642084757
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,fp8,31,0.16025599837303162
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,63,0.1868799924850464
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,fp8,63,0.16062933206558228
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,127,0.22323733568191528
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,fp8,127,0.19080533583958945
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,255,0.3319466710090637
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,fp8,255,0.29371732473373413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,511,0.5946079889933268
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,fp8,511,0.49988265832265216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,float16,1023,1.0815253257751465
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,1,float16,fp8,1023,0.9255253473917643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,1,0.02902399996916453
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,3,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,3,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,1,0.026288000245889027
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,7,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,7,0.025946666797002155
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,15,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,15,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,31,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,31,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,63,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,63,0.02698666602373123
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,127,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,255,0.02932800104220708
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,255,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,127,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,511,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,511,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,float16,1023,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,1,float16,fp8,1023,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,1,0.2916799982388814
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,1,0.2297226587931315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,3,0.2988213300704956
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,7,0.303274671236674
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,3,0.2362026572227478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,7,0.24643733104070029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,15,0.3626720110575358
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,15,0.3135146697362264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,31,0.3667626778284709
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,31,0.3135146697362264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,63,0.368127981821696
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,63,0.3155626654624939
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,127,0.438101331392924
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,127,0.3735946814219157
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,float16,255,0.6553599834442139
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,1,float16,fp8,255,0.5799253384272257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,1,0.2916640043258667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,fp8,1,0.2290239930152893
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,3,0.29815467198689777
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,fp8,3,0.23587199052174887
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,7,0.303274671236674
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,fp8,7,0.2481493353843689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,15,0.36266668637593585
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,fp8,15,0.3141973416010539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,31,0.3667626778284709
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,fp8,31,0.3141973416010539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,63,0.368127981821696
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,fp8,63,0.3158986568450928
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,127,0.43878400325775146
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,fp8,127,0.3739360173543294
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,float16,255,0.715445359547933
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,1,float16,fp8,255,0.5802666743596395
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,1,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,1,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,3,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,7,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,7,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,15,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,15,0.04334400097529093
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,31,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,3,0.042992000778516136
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,31,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,63,0.043696001172065735
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,63,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,127,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,127,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,float16,255,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,1,float16,fp8,255,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,1,0.5778719981511434
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,3,0.4671200116475423
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,3,0.5922133525212606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,7,0.4848586718241374
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,7,0.6007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,15,0.7195306619008383
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,15,0.6229439973831177
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,1,0.45070401827494305
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,31,0.7283999919891357
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,31,0.6212266683578491
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,63,0.7294080257415771
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,63,0.6253013213475546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,float16,127,0.869370698928833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,1,float16,fp8,127,0.7403573195139567
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,1,0.5782186587651571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,fp8,1,0.45073068141937256
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,3,0.5922186772028605
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,fp8,3,0.4667786757151286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,7,0.6024479866027832
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,fp8,7,0.4886186520258586
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,15,0.719871997833252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,fp8,15,0.6229333480199178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,31,0.7280693054199219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,fp8,31,0.6225866476694742
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,63,0.7307893435160319
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,fp8,63,0.6253226598103842
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,float16,127,0.943615992863973
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,1,float16,fp8,127,0.7424000104268392
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,1,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,1,0.07714133461316426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,3,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,3,0.07679999868075053
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,7,0.07748266557852428
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,15,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,15,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,7,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,31,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,31,0.07646400233109792
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,63,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,63,0.07645866771539052
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,float16,127,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,1,float16,fp8,127,0.07577600081761678
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,3,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,3,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,15,0.017055999487638474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,31,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,31,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,63,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,127,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,127,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,255,0.029029332101345062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,255,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,511,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,511,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,1023,0.07987200220425923
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,1023,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,2047,0.14592533310254416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,2047,0.13740266362826029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,4095,0.2786986629168193
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,4095,0.2630026737848918
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,8191,0.5457919836044312
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,8191,0.5179733435312907
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,float16,16383,1.2209440072377522
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,1,float16,fp8,16383,1.0917493502298992
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,15,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,31,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,63,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,63,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,127,0.020794666061798733
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,127,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,255,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,255,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,511,0.04571733375390371
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,511,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,1023,0.07918933530648549
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,1023,0.07406933108965556
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,2047,0.1455839971701304
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,2047,0.1378986636797587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,4095,0.27938133478164673
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,4095,0.26368000109990436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,8191,0.5488693316777548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,8191,0.5189973513285319
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,float16,16383,1.5836159388224285
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,1,float16,fp8,16383,1.1033493677775066
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,1,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,3,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,3,0.011493333925803503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,7,0.011482667177915573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,15,0.011610666910807291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,31,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,63,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,63,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,127,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,255,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,255,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,1023,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,4095,0.021183999876181286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,4095,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,8191,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,8191,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,float16,16383,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,1,float16,fp8,16383,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,1,1.1501226425170898
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,1,0.8949759801228842
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,3,1.1784586906433105
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,3,0.9292799631754557
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,7,0.9603412946065267
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,7,1.193989356358846
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,15,1.4329172770182292
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,15,1.2383573055267334
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,31,1.2366507053375244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,31,1.4511787096659343
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,fp8,63,1.243834654490153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,1,float16,float16,63,1.4552747408548992
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,1,1.1490879853566487
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,fp8,1,0.893946647644043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,3,1.1810133457183838
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,fp8,3,0.9268906911214193
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,7,1.2031946976979573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,fp8,7,0.9698987007141113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,15,1.4380319913228352
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,fp8,15,1.2390399773915608
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,31,1.4923307100931804
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,fp8,31,1.239034652709961
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,float16,63,1.5291733741760254
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,1,float16,fp8,63,1.2654933134714763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,1,0.1462613344192505
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,1,0.15447466572125754
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,3,0.155648003021876
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,7,0.15561599532763162
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,3,0.14575466513633728
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,15,0.1551040013631185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,7,0.14661866426467896
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,15,0.1460906664530436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,31,0.15600533286730447
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,31,0.1460906664530436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,float16,63,0.15617600083351135
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,1,float16,fp8,63,0.146096001068751
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,float16,1,2.415104071299235
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,fp8,1,1.872554620107015
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,float16,3,2.474837303161621
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,fp8,3,1.930400053660075
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,float16,7,2.5036800702412925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,fp8,7,2.013866742451986
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,fp8,15,2.493781407674154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,float16,15,2.896725336710612
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,float16,31,2.9542452494303384
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,1,float16,fp8,31,2.496847947438558
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,float16,1,2.499242623647054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,fp8,1,1.9375786781311035
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,fp8,3,1.9752960205078125
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,float16,3,2.5430986086527505
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,float16,7,2.569728056589762
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,fp8,7,2.0613120396931968
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,float16,15,2.937317212422689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,fp8,15,2.5229652722676597
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,float16,31,3.021317481994629
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,1,float16,fp8,31,2.524325370788574
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,fp8,1,0.2800640066464742
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,float16,1,0.2984960079193115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,float16,3,0.29951467116673786
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,fp8,3,0.2800640066464742
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,float16,7,0.3008799950281779
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,fp8,7,0.2797226707140605
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,fp8,15,0.28037865956624347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,float16,15,0.3002079923947652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,float16,31,0.2988426685333252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,1,float16,fp8,31,0.28074665864308673
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,1,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,1,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,3,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,7,0.0249493345618248
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,7,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,15,0.029029332101345062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,15,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,31,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,3,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,31,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,63,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,127,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,63,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,127,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,255,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,255,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,511,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,511,0.0727040022611618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,1023,0.13960533340771994
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,1023,0.1293760041395823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,2047,0.2604373296101888
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,2047,0.24200532833735147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,4095,0.501258651415507
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,4095,0.4681386550267537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,float16,8191,0.9832106431325277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,1,float16,fp8,8191,0.9231359958648682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,1,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,3,0.0249439999461174
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,3,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,7,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,7,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,15,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,15,0.024933333198229473
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,31,0.029029332101345062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,31,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,63,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,63,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,127,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,127,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,255,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,255,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,511,0.08020266890525818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,511,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,1023,0.13994666934013367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,1023,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,2047,0.26043200492858887
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,4095,0.5128586689631144
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,4095,0.4686773220698039
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,float16,8191,1.1450026830037434
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,2047,0.24201067288716635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,1,float16,fp8,8191,0.9285973707834879
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,1,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,3,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,1,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,3,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,7,0.011247999966144562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,15,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,127,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,511,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,1023,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,511,0.012602667013804117
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,2047,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,2047,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,4095,0.03923733284076055
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,4095,0.027285332481066387
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,float16,8191,0.060421332716941833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,1,float16,fp8,8191,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,1,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,3,0.0310506671667099
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,1,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,3,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,7,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,7,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,15,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,15,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,31,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,31,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,63,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,127,0.04299733539422353
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,63,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,255,0.061797335743904114
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,255,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,511,0.10068800052007039
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,511,0.09076799949010213
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,1023,0.17629333337148032
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,1023,0.1621333360671997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,2047,0.3285333315531413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,2047,0.30498133103052777
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,4095,0.591866652170817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,4095,0.6336853504180908
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,127,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,float16,8191,1.3061119715372722
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,1,float16,fp8,8191,1.1661653518676758
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,1,0.025274666647116344
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,3,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,1,0.029029332101345062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,3,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,7,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,15,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,7,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,31,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,63,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,127,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,63,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,255,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,127,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,511,0.09045867125193278
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,255,0.05428266525268555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,511,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,1023,0.1763040026028951
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,1023,0.16214399536450705
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,2047,0.328874667485555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,2047,0.3039519985516866
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,31,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,4095,0.6615093151728312
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,8191,1.1760640144348145
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,float16,8191,1.4431413014729817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,1,float16,fp8,4095,0.5915360053380331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,1,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,3,0.011237333218256632
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,1,0.011498666057984034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,15,0.011509332805871964
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,7,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,7,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,31,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,31,0.012181332955757776
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,127,0.012298667182525
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,63,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,127,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,255,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,511,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,255,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,1023,0.01570133368174235
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,4095,0.04027733455101649
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,4095,0.027322667340437572
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,fp8,8191,0.04437333345413208
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,1,float16,float16,8191,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,3,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,1,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,3,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,15,0.013663999736309052
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,7,0.013199999928474426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,31,0.014346666634082794
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,15,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,63,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,127,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,127,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,255,0.012986666212479273
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,1023,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,1023,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,2047,0.0262773334980011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,2047,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,4095,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,float16,8191,0.105103999376297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,4095,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,1,float16,fp8,8191,0.06280000011126201
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,1,0.008885333314538002
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,1,0.00921066664159298
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,3,0.008879999940594038
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,7,0.008858666444818178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,7,0.008826666822036108
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,3,0.008752000207702318
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,15,0.010005333150426546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,31,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,31,0.00879466657837232
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,15,0.008746666833758354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,511,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,511,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,1023,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,2047,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,1023,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,2047,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,4095,0.05156800150871277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,4095,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,8191,0.09113599856694539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,8191,0.08430932958920796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,16383,0.1641813317934672
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,16383,0.14779200156529745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,1,0.00854399986565113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,1,0.008789333204428354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,fp8,32767,0.2749493320782979
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,1,float16,float16,32767,0.3063466747601827
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,3,0.008538666491707167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,7,0.008778666456540426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,15,0.008527999743819237
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,15,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,7,0.008736000085870424
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,31,0.010506667196750641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,31,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,63,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,127,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,511,0.022853332261244457
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,255,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,511,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,3,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,1023,0.02731200059254964
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,1023,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,2047,0.037231999138991036
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,2047,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,4095,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,4095,0.05563200016816457
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,8191,0.09249599774678548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,16383,0.1462613344192505
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,8191,0.08258666594823201
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,16383,0.16401066382726034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,1,0.010698666175206503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,fp8,32767,0.27460267146428424
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,1,float16,float16,32767,0.3056640028953552
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,15,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,15,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,63,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,127,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,127,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,255,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,511,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,4095,0.012949333836634954
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,4095,0.01267733300725619
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,8191,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,float16,32767,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,32767,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,1,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,1,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,7,0.010687999427318573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,15,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,31,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,31,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,127,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,63,0.010965333630641302
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,127,0.010458666831254959
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,511,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,255,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,2047,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,4095,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,1023,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,4095,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,8191,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,16383,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,1,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,32767,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,float16,32767,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,1,float16,fp8,16383,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,1,0.008879999940594038
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,7,0.009722666814923286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,3,0.009888000165422758
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,7,0.009183999771873156
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,3,0.008698666468262672
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,15,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,31,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,15,0.00890666681031386
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,31,0.010768000036478043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,127,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,511,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,511,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,1023,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,1023,0.031386665999889374
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,2047,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,4095,0.0747519979874293
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,2047,0.043680002291997276
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,4095,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,16383,0.211626668771108
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,8191,0.11706667145093282
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,16383,0.2379093368848165
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,8191,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,float16,32767,0.45311999320983887
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,1,0.008879999940594038
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,1,float16,fp8,32767,0.4041386842727661
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,1,0.008869333192706108
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,7,0.008869333192706108
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,7,0.008762666955590248
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,3,0.008736000085870424
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,15,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,3,0.008885333314538002
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,15,0.008879999940594038
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,31,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,127,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,63,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,511,0.02628266563018163
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,255,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,1023,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,1023,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,2047,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,2047,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,8191,0.12935466567675272
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,4095,0.07441600163777669
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,4095,0.0682666649421056
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,8191,0.116047998269399
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,32767,0.4538026650746663
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,16383,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,float16,16383,0.23620800177256265
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,1,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,1,float16,fp8,32767,0.4041386842727661
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,7,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,7,0.010549332946538925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,31,0.01080000028014183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,127,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,63,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,127,0.010874666273593903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,255,0.010890666395425797
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,255,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,1023,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,511,0.012597333639860153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,1023,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,4095,0.018751999984184902
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,4095,0.018437333405017853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,2047,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,16383,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,16383,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,32767,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,3,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,fp8,32767,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,1,float16,float16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,15,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,31,0.010496000448862711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,31,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,63,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,127,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,1023,0.01080000028014183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,511,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,2047,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,4095,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,8191,0.018794666975736618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,1,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,1,0.05495466788609823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,16383,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,3,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,3,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,float16,32767,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,1,float16,fp8,32767,0.025578667720158894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,7,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,15,0.06553066770235698
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,7,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,15,0.057664001981417336
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,31,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,31,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,63,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,127,0.07850666840871175
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,63,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,127,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,255,0.1153706709543864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,511,0.17287466923395792
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,511,0.19097065925598145
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,1023,0.3421866496404012
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,1023,0.31349867582321167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,255,0.10274666547775269
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,2047,0.5966506799062093
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,1,0.05392533540725708
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,4095,1.3189120292663574
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,float16,2047,0.6432426770528158
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,1,float16,fp8,4095,1.1593386332194011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,1,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,3,0.05564799904823303
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,3,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,7,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,15,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,15,0.057674666245778404
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,7,0.04610133171081543
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,31,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,63,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,31,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,63,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,255,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,127,0.067930668592453
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,127,0.0784800002972285
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,255,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,511,0.1925119956334432
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,511,0.1723733345667521
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,1023,0.3421866496404012
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,4095,1.404586633046468
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,2047,0.5959680080413818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,4095,1.1665066878000896
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,fp8,1023,0.31351999441782635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,1,float16,float16,2047,0.6772212982177734
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,15,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,31,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,63,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,63,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,255,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,127,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,511,0.017055999487638474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,511,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,1023,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,1023,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,2047,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,fp8,4095,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,2047,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,1,float16,float16,4095,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,1,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,3,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,1,0.015040000279744467
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,7,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,7,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,15,0.014997333288192749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,31,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,63,0.015381333728631338
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,255,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,1023,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,1023,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,2047,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,2047,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,float16,4095,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,1,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,4095,0.06418133278687795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,7,0.010239999741315842
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,1,float16,fp8,255,0.01504533365368843
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,511,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,511,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,1023,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,2047,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,2047,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,4095,0.11502400040626526
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,127,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,4095,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,1023,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,8191,0.21538132429122925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,8191,0.1742560068766276
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,16383,0.33877865473429364
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,16383,0.4163999954859416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,fp8,32767,0.7113386789957682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,3,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,1,float16,float16,32767,0.8210720221201578
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,3,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,15,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,7,0.009658666948477427
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,127,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,511,0.019141333798567455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,1023,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,1023,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,255,0.015109332899252573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,2047,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,4095,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,4095,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,8191,0.21469332774480185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,2047,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,16383,0.3377546469370524
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,8191,0.17425066232681274
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,32767,0.8369440237681071
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,float16,16383,0.4164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,1,0.0107893335322539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,1,float16,fp8,32767,0.7106560071309408
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,1,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,7,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,3,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,127,0.010784000158309937
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,63,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,31,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,255,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,255,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,2047,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,4095,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,4095,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,2047,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,16383,0.024933333198229473
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,1,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,fp8,32767,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,1,float16,float16,32767,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,15,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,63,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,63,0.010672000547250112
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,31,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,511,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,4095,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,8191,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,32767,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,fp8,32767,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,1,0.10274666547775269
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,3,0.08431466420491536
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,3,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,1,float16,float16,16383,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,1,0.08055999875068665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,7,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,7,0.08772266904513042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,15,0.10890133182207744
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,31,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,15,0.12526399890581766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,127,0.1502133309841156
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,63,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,31,0.10923199852307637
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,127,0.1293706695238749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,255,0.2239146629969279
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,511,0.3735893170038859
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,63,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,255,0.19917333126068115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,1023,0.6171306769053141
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,511,0.3363786538441976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,1023,0.6727840105692545
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,3,0.1037493348121643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,fp8,2047,1.1764106750488281
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,1,float16,float16,2047,1.3253973325093586
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,1,0.08021866778532664
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,7,0.106495996316274
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,1,0.10103999574979146
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,15,0.1252959966659546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,3,0.08329066634178162
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,15,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,31,0.1088800032933553
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,127,0.15035200119018555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,63,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,63,0.12731200456619263
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,31,0.1256106694539388
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,255,0.22354666392008463
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,511,0.3735733429590861
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,255,0.19901333252588907
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,511,0.33638401826222736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,1023,0.7099626859029134
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,127,0.12936000029246011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,1023,0.6167840162913004
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,float16,2047,1.3742079734802246
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,2047,1.1827200253804524
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,1,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,1,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,3,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,7,0.018426666657129925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,15,0.02014933278163274
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,7,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,31,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,15,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,31,0.018805333723624546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,3,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,1,float16,fp8,7,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,63,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,63,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,127,0.018794666975736618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,255,0.017045332739750545
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,255,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,127,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,1023,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,1023,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,511,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,2047,0.04710400104522705
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,float16,2047,0.0641653339068095
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,3,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,3,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,1,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,1,float16,fp8,511,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,15,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,7,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,15,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,7,0.023546665906906128
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,63,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,127,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,127,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,63,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,255,0.02422933280467987
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,255,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,511,0.0365226666132609
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,1023,0.038917332887649536
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,2047,0.09935466448465984
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,float16,1023,0.056320001681645714
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,511,0.025605333348115284
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,1,0.15581867098808289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,3,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,1,float16,fp8,2047,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,3,0.16059733430544534
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,1,0.19712533553441366
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,7,0.20565332969029745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,7,0.16742400328318277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,31,0.2461013396581014
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,15,0.21130667130152384
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,63,0.24780799945195517
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,15,0.24405866861343384
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,127,0.2950826684633891
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,63,0.21333332856496176
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,31,0.21127466360727945
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,255,0.4398080110549927
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,float16,511,0.7369386355082194
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,255,0.3896266619364421
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,127,0.25222400824228924
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,1,0.1971199909845988
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,3,0.15991999705632529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,7,0.20511466264724731
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,1,0.15582399566968283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,7,0.168287992477417
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,1,float16,fp8,511,0.6615093151728312
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,15,0.211626668771108
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,3,0.20122132698694864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,15,0.244053324063619
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,31,0.2461013396581014
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,31,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,127,0.25224000215530396
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,63,0.24780799945195517
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,127,0.29474133253097534
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,255,0.4404960076014201
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,255,0.389631986618042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,float16,511,0.7889973322550455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,1,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,511,0.6608320077260336
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,1,float16,fp8,63,0.21333332856496176
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,1,0.026975999275843304
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,15,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,15,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,7,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,7,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,3,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,3,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,63,0.02899733434120814
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,63,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,31,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,127,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,31,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,127,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,511,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,511,0.03513066718975703
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,float16,255,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,1,float16,fp8,255,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,1,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,1,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,7,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,7,0.03107200066248576
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,3,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,3,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,31,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,63,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,63,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,15,0.03755199909210205
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,15,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,31,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,127,0.037861332297325134
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,127,0.0310506671667099
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,255,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,255,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,float16,511,0.060080001751581825
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,1,float16,fp8,511,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,7,0.32819199562072754
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,15,0.48179201285044354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,1,0.38758401075998944
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,3,0.3973120053609212
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,1,0.3036160071690877
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,7,0.40414400895436603
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,3,0.3141973416010539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,31,0.4164213339487712
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,31,0.4872479836146037
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,15,0.41811732451121014
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,127,0.49612267812093097
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,63,0.42001068592071533
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,127,0.583679993947347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,63,0.4896426598230998
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,float16,255,0.8714240392049154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,1,float16,fp8,255,0.7703893184661865
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,1,0.38758401075998944
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,1,0.30395734310150146
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,3,0.3959466616312663
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,15,0.41813333829243976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,31,0.48760000864664715
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,3,0.3141813278198242
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,7,0.4041386842727661
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,15,0.48179201285044354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,7,0.3281866709391276
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,63,0.42001068592071533
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,63,0.4896479845046997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,31,0.41779200236002606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,127,0.5829973220825195
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,127,0.49612800280253094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,1,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,float16,255,0.9507839679718018
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,1,float16,fp8,255,0.7707200050354004
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,3,0.047781333327293396
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,7,0.04915733138720194
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,1,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,3,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,15,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,7,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,15,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,31,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,63,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,31,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,127,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,127,0.04980800052483877
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,63,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,1,0.052560001611709595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,3,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,float16,255,0.04983466863632202
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,1,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,1,float16,fp8,255,0.04367466767628988
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,3,0.06553600231806438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,15,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,15,0.05189333359400431
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,7,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,7,0.05189866820971171
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,31,0.05188799897829691
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,31,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,63,0.06485333542029063
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,127,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,127,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,63,0.05187733471393585
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,7,0.014005333185195923
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,float16,255,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,1,0.015002666662136713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,1,float16,fp8,255,0.05358933409055074
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,15,0.016757333030303318
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,7,0.016063999384641647
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,31,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,15,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,127,0.020848001043001812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,31,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,255,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,511,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,255,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,511,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,2047,0.09455999732017517
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,1023,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,4095,0.17493333419164023
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,8191,0.3357013463973999
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,2047,0.110944002866745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,1023,0.054234668612480164
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,4095,0.2112906575202942
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,16383,0.8265386422475179
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,8191,0.4119946559270223
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,3,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,32767,1.4161920547485352
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,fp8,16383,0.6567253271738688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,1,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,1,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,1,float16,float16,32767,2.8957014083862305
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,7,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,3,0.014736000448465347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,127,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,255,0.023578666150569916
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,63,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,31,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,127,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,255,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,1023,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,1023,0.05529599885145823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,511,0.03618666778008143
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,511,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,4095,0.17425066232681274
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,8191,0.42308799425760907
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,2047,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,2047,0.09455466270446777
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,4095,0.21366933981577554
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,8191,0.33604268232981366
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,32767,1.4634613990783691
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,32767,3.722410519917806
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,float16,16383,0.8454826672871908
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,1,float16,fp8,16383,0.6574026743570963
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,3,0.010794666906197866
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,7,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,3,0.010954666882753372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,7,0.011247999966144562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,15,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,63,0.010618666807810465
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,15,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,127,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,255,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,63,0.010746666540702185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,8191,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,32767,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,8191,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,16383,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,float16,32767,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,1,float16,fp8,4095,0.019461333751678467
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,3,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,31,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,63,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,15,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,511,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,2047,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,8191,0.024933333198229473
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,4095,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,8191,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,32767,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,16383,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,float16,32767,0.10171733299891154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,1,float16,fp8,16383,0.039605334401130676
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,1,0.7683413028717041
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,3,0.6246399879455566
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,1,0.6007413466771444
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,3,0.7869386672973633
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,7,0.8029812971750895
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,15,0.9589760303497314
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,15,0.8296159903208414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,7,0.6463146607081095
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,31,0.96888534228007
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,31,0.8296106656392416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,63,0.973311980565389
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,63,0.8337013721466064
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,3,0.7869439919789633
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,float16,127,1.1801546414693196
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,1,float16,fp8,127,0.986624002456665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,1,0.7686826388041178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,1,0.6007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,3,0.6239626804987589
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,15,0.9582880338033041
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,7,0.8023200035095215
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,7,0.6490506728490194
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,63,0.8350719610850016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,15,0.8292693297068278
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,31,0.9695413112640381
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,31,0.8282399972279867
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,63,0.9821866353352865
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,float16,127,1.2544000148773193
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,1,0.08258666594823201
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,1,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,1,float16,fp8,127,0.9941279888153076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,3,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,3,0.08089066545168559
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,15,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,7,0.08875200152397156
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,15,0.08123733103275299
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,7,0.08055999875068665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,31,0.08806399504343669
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,31,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,63,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,float16,127,0.0897706647713979
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,63,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,1,0.1225386659304301
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,1,float16,fp8,127,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,1,0.0942133367061615
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,7,0.09454400340716045
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,3,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,3,0.09386666615804036
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,7,0.12186132868131001
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,15,0.1225333313147227
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,15,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,63,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,31,0.12152533729871114
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,31,0.09352533022562663
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,63,0.0942080020904541
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,float16,127,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,1,float16,fp8,127,0.09386666615804036
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,1,1.198741356531779
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,1,1.534287929534912
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,3,1.5778080622355144
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,3,1.246890703837077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,7,1.2941653728485107
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,7,1.609557310740153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,15,1.9143679936726887
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,15,1.6501760482788086
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,31,1.9631786346435547
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,31,1.6542773246765137
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,float16,63,2.0159145991007485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,1,float16,fp8,63,1.6733706792195637
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,1,1.5424853960673015
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,7,1.6293493906656902
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,1,1.2001279989878337
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,3,1.244165341059367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,3,1.597610632578532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,7,1.3085066477457683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,15,1.9327999750773113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,31,1.6668960253397624
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,15,1.6559839248657227
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,31,2.007045269012451
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,1,0.16435733437538147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,1,0.1527466674645742
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,3,0.16313599546750387
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,float16,63,2.0381013552347818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,1,float16,fp8,63,1.6926719347635906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,3,0.15411200126012167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,31,0.1634719967842102
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,7,0.16331733266512552
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,7,0.15377066532770792
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,15,0.1532693306605021
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,15,0.1641866664091746
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,float16,63,0.1641759971777598
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,31,0.15291733543078104
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,1,float16,fp8,63,0.1539466679096222
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,1,0.2283573349316915
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,1,0.17442133029301962
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,3,0.22937599817911783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,3,0.17476266622543335
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,7,0.22869332631429037
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,7,0.17474132776260376
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,15,0.23072532812754312
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,15,0.17595734198888144
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,1,0.015029333531856537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,3,0.019130667050679524
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,31,0.2283626596132914
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,7,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,31,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,31,0.17425066232681274
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,15,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,7,0.01498666654030482
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,float16,63,0.2290346622467041
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,15,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,1,float16,fp8,63,0.17493865887324014
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,63,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,31,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,127,0.024890666206677754
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,127,0.021151999632517498
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,63,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,511,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,255,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,255,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,1023,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,511,0.05600533386071523
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,1023,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,4095,0.3401333491007487
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,8191,0.6007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,2047,0.17680533727010092
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,2047,0.15871999661127725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,4095,0.30565865834554035
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,8191,0.6655999819437662
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,fp8,16383,1.3085013230641682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,1,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,1,float16,float16,16383,1.6803785959879558
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,7,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,3,0.019098666807015736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,31,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,7,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,15,0.018800000349680584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,15,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,63,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,31,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,63,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,127,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,511,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,255,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,127,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,255,0.03513599932193756
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,511,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,2047,0.16008533040682474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,2047,0.1764693260192871
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,1023,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,1023,0.09659733374913533
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,4095,0.34219201405843097
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,4095,0.30566932757695514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,16383,1.9875839551289876
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,float16,8191,0.7133813699086508
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,16383,1.3373440106709797
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,1,float16,fp8,8191,0.6034773190816244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,3,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,7,0.010842667271693548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,15,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,7,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,15,0.011258666714032492
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,63,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,31,0.011237333218256632
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,3,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,127,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,511,0.012661332885424295
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,127,0.010784000158309937
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,255,0.01129066695769628
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,1023,0.012858666479587555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,1023,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,8191,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,4095,0.021173333128293354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,8191,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,fp8,16383,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,1,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,1,float16,float16,16383,0.06144533554712931
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,3,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,1,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,31,0.010847999403874079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,15,0.011231999844312668
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,3,0.011237333218256632
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,7,0.012549333274364471
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,127,0.011936000237862269
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,63,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,1023,0.014650666465361914
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,255,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,2047,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,4095,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,4095,0.038575999438762665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,511,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,8191,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,8191,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,3,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,float16,16383,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,1,float16,fp8,16383,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,1,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,15,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,15,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,7,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,31,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,7,0.015008000036080679
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,31,0.021183999876181286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,63,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,3,0.01498666654030482
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,127,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,63,0.021141332884629566
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,255,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,511,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,1023,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,127,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,255,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,511,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,2047,0.1768266757329305
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,2047,0.15838399529457092
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,4095,0.34013867378234863
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,4095,0.30532266696294147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,1023,0.09660266836484273
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,8191,0.7236266930898031
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,1,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,1,0.011498666057984034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,3,0.01089599976936976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,8191,0.6017760038375854
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,float16,16383,1.9838293393452961
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,15,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,1,float16,fp8,16383,1.3707946141560872
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,31,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,63,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,63,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,1023,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,1023,0.01471466695268949
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,127,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,4095,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,255,0.01089599976936976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,8191,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,1,0.012602667013804117
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,8191,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,fp8,16383,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,1,float16,float16,16383,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,7,0.012597333639860153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,3,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,15,0.011130666981140772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,31,0.011957333733638128
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,15,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,63,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,127,0.012693333129088083
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,127,0.01249066616098086
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,3,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,511,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,4095,0.03924266745646795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,8191,0.06142933170000712
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,8191,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,4095,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,1023,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,fp8,16383,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,2047,0.021168000996112823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,1,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,1,float16,float16,16383,0.10272533694903056
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,15,0.012719999998807907
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,7,0.013658666362365087
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,127,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,127,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,15,0.012847999731699625
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,511,0.014997333288192749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,1023,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,255,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,2047,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,2047,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,4095,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,8191,0.10442666212717693
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,1,0.008527999743819237
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,3,0.008799999952316284
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,1,0.00877333308259646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,3,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,8191,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,4095,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,fp8,16383,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,1,float16,float16,16383,0.19165333112080893
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,7,0.008869333192706108
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,7,0.00877333308259646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,15,0.00850133349498113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,15,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,31,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,255,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,63,0.00878399983048439
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,511,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,2047,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,4095,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,1023,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,8191,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,2047,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,4095,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,16383,0.10478400190671285
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,32767,0.1909760038057963
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,16383,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,8191,0.0576800008614858
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,3,0.010949333508809408
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,float16,65535,0.36300798257191974
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,7,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,32767,0.17269867658615112
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,1,float16,fp8,65535,0.32546132802963257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,3,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,15,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,127,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,255,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,7,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,1023,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,2047,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,511,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,4095,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,8191,0.0191040001809597
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,1,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,32767,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,32767,0.029706666866938274
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,4095,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,float16,65535,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,1,float16,fp8,65535,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,7,0.0106133334338665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,31,0.010629333555698395
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,15,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,127,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,1023,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,8191,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,4095,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,2047,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,16383,0.021130666136741638
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,4095,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,2047,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,16383,0.02117866774400075
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,32767,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,65535,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,1,0.010480000327030817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,fp8,32767,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,1,float16,float16,65535,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,1,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,31,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,63,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,3,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,127,0.010687999427318573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,1023,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,255,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,127,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,511,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,4095,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,2047,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,2047,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,16383,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,1023,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,16383,0.019093333433071773
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,32767,0.02489600082238515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,3,0.0085333331177632
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,1,0.0085333331177632
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,7,0.008778666456540426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,3,0.008645333349704742
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,float16,65535,0.039264000952243805
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,32767,0.022522665560245514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,1,float16,fp8,65535,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,7,0.00854399986565113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,15,0.008650666723648706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,1,0.008538666491707167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,31,0.009898666913310686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,15,0.008752000207702318
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,127,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,31,0.009904000287254652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,511,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,1023,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,511,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,1023,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,4095,0.05565333366394043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,2047,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,4095,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,8191,0.0921493371327718
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,2047,0.03513066718975703
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,255,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,16383,0.14779733618100485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,16383,0.16247466206550598
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,8191,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,32767,0.27665066719055176
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,65535,0.5922133525212606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,float16,32767,0.30668266614278156
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,1,float16,fp8,65535,0.5290666818618774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,7,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,1,0.011231999844312668
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,31,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,15,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,63,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,63,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,127,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,511,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,511,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,255,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,4095,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,2047,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,1023,0.011247999966144562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,8191,0.020842666427294414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,4095,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,16383,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,32767,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,16383,0.02422400067249934
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,float16,65535,0.028330666323502857
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,32767,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,1,float16,fp8,65535,0.027290667096773785
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,1,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,3,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,15,0.010464000205198923
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,63,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,127,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,511,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,255,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,255,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,1023,0.011258666714032492
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,4095,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,4095,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,16383,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,16383,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,32767,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,8191,0.019088000059127808
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,65535,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,float16,32767,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,1,float16,fp8,65535,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,31,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,3,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,7,0.01055466632048289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,63,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,31,0.010784000158309937
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,511,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,1023,0.011621333658695221
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,2047,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,1023,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,511,0.01128000020980835
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,16383,0.024256000916163128
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,16383,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,1,0.025237334271272022
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,32767,0.04027199993530909
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,float16,65535,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,3,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,32767,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,1,0.029696000119050343
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,1,float16,fp8,65535,0.03926933308442434
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,7,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,15,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,7,0.026975999275843304
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,3,0.025455998877684276
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,63,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,31,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,127,0.04197333256403605
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,127,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,31,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,63,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,511,0.10069333513577779
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,255,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,1023,0.17667200167973837
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,1023,0.1621333360671997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,511,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,255,0.055642664432525635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,2047,0.3278613289197286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,2047,0.30395734310150146
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,4095,0.6487040122350057
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,float16,8191,1.4281439781188965
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,8191,1.1743573347727458
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,1,float16,fp8,4095,0.5918720165888468
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,1,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,1,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,15,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,3,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,127,0.011600000162919363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,31,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,127,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,1023,0.015360000232855478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,1023,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,511,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,1,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,4095,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,2047,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,fp8,8191,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,4095,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,1,float16,float16,8191,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,3,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,3,0.012714666624863943
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,15,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,63,0.0143306665122509
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,15,0.013066666821638743
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,31,0.012602667013804117
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,31,0.013034666577974955
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,127,0.013546666751305262
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,255,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,2047,0.04094400008519491
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,1023,0.019088000059127808
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,1023,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,2047,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,4095,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,8191,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,float16,4095,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,1,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,1,float16,fp8,8191,0.06383466720581055
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,7,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,31,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,63,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,2047,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,1023,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,1023,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,2047,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,4095,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,4095,0.10545600454012553
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,float16,8191,0.19114132722218832
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,1,float16,fp8,8191,0.10923199852307637
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,1,0.00884799969693025
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,7,0.008885333314538002
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,3,0.0085333331177632
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,1,0.0085333331177632
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,3,0.008698666468262672
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,15,0.008767999708652496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,15,0.008767999708652496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,31,0.0106133334338665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,7,0.008874666566650072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,63,0.01044800008336703
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,511,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,1023,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,1023,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,2047,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,511,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,2047,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,4095,0.07441066702206929
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,4095,0.068271999557813
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,8191,0.11707199613253276
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,16383,0.2379093368848165
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,16383,0.21128000815709433
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,8191,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,32767,0.45311999320983887
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,3,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,32767,0.4041386842727661
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,float16,65535,0.8881440162658691
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,7,0.010842667271693548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,7,0.01062400018175443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,1,float16,fp8,65535,0.7886239687601725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,127,0.010784000158309937
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,127,0.010949333508809408
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,511,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,2047,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,4095,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,4095,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,8191,0.021173333128293354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,16383,0.02458133300145467
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,32767,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,8191,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,1,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,16383,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,fp8,65535,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,32767,0.028010666370391846
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,1,float16,float16,65535,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,3,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,7,0.01055466632048289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,31,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,3,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,63,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,511,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,255,0.010618666807810465
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,1023,0.011616000284751257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,4095,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,4095,0.018805333723624546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,16383,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,32767,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,1,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,65535,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,fp8,65535,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,1,float16,float16,32767,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,1,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,15,0.0106133334338665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,63,0.010677333921194077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,511,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,1023,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,2047,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,16383,0.03857066730658213
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,32767,0.061093335350354515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,float16,65535,0.10273067156473796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,1,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,32767,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,1,0.043712000052134194
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,1,float16,fp8,65535,0.06005333364009857
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,3,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,7,0.055973331133524575
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,3,0.05596266686916351
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,7,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,15,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,63,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,15,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,31,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,127,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,63,0.05769066512584686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,31,0.0580213318268458
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,127,0.07851199805736542
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,255,0.10274666547775269
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,255,0.11707733074824016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,1023,0.3421866496404012
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,511,0.19098132848739624
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,511,0.1723733345667521
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,1023,0.3135146697362264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,2047,0.6683306694030762
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,float16,4095,1.4066346486409504
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,1,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,2047,0.5959733327229818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,1,float16,fp8,4095,1.170261303583781
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,7,0.014688000082969666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,31,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,7,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,63,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,127,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,127,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,2047,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,255,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,2047,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,1023,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,1023,0.019285333653291065
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,float16,4095,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,1,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,1,float16,fp8,4095,0.04607999821503957
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,3,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,7,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,15,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,63,0.01504533365368843
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,127,0.01504533365368843
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,127,0.016757333030303318
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,511,0.01874133323629697
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,1023,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,2047,0.04164800047874451
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,2047,0.06278400123119354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,1023,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,float16,4095,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,1,float16,fp8,4095,0.06553066770235698
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,3,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,7,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,15,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,7,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,31,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,15,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,31,0.023242667317390442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,63,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,127,0.020853333175182343
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,255,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,511,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,255,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,511,0.02628266563018163
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,2047,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,1023,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,4095,0.1793760061264038
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,1023,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,float16,2047,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,1,float16,fp8,4095,0.10104533036549886
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,1,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,1,0.1013813316822052
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,3,0.10476266344388326
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,7,0.10684800148010254
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,15,0.10923199852307637
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,31,0.12563199798266092
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,31,0.10889066259066264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,3,0.08428800106048584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,7,0.08739200234413147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,15,0.12526933352152506
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,63,0.10924800237019856
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,63,0.12731732924779257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,127,0.15018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,127,0.1293653349081675
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,255,0.22357332706451416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,255,0.19916266202926636
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,511,0.3735893170038859
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,511,0.33638401826222736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,float16,1023,0.7113440036773682
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,1,0.019050666441520054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,1,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,1,float16,fp8,1023,0.6171306769053141
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,3,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,15,0.020138667275508244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,15,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,31,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,3,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,7,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,7,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,31,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,63,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,127,0.020138667275508244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,127,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,63,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,511,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,255,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,255,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,511,0.023546665906906128
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,float16,1023,0.03789333254098892
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,1,float16,fp8,1023,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,15,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,3,0.023232000569502514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,15,0.020794666061798733
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,31,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,3,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,7,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,7,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,63,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,127,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,63,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,255,0.02422400067249934
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,255,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,511,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,float16,1023,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,1023,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,1,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,1,float16,fp8,511,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,1,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,7,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,15,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,3,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,3,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,31,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,7,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,31,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,63,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,63,0.029002666473388672
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,127,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,127,0.02903999884923299
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,255,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,511,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,1023,0.09762133161226909
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,float16,511,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,1023,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,1,0.19694934288660684
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,1,float16,fp8,255,0.029029332101345062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,7,0.20533865690231323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,3,0.1604320009549459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,3,0.20121600230534872
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,1,0.15615999698638916
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,15,0.24439465999603271
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,15,0.2126506765683492
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,7,0.16878400246302286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,31,0.21163199345270792
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,31,0.2464159925778707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,127,0.25224532683690387
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,63,0.24813334147135416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,255,0.44049068291982013
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,63,0.2133386731147766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,127,0.29474133253097534
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,255,0.3896373510360718
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,float16,511,0.785749355951945
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,1,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,3,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,1,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,1,float16,fp8,511,0.6614773273468018
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,15,0.02937600016593933
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,3,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,7,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,7,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,15,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,31,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,63,0.027322667340437572
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,31,0.02731200059254964
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,255,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,127,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,63,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,127,0.029722665747006733
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,255,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,fp8,511,0.03480533262093862
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,1,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,1,float16,float16,511,0.03958933303753535
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,1,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,3,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,3,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,7,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,7,0.031066666046778362
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,15,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,15,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,31,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,31,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,63,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,255,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,255,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,127,0.0310506671667099
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,63,0.031045332551002502
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,127,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,1,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,3,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,1,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,3,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,fp8,511,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,1,float16,float16,511,0.060080001751581825
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,7,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,7,0.04742933313051859
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,15,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,15,0.047797332207361855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,31,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,31,0.060080001751581825
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,63,0.06043200194835663
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,63,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,255,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,127,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,255,0.05085866649945577
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,127,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,float16,511,0.10171733299891154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,1,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,1,float16,fp8,511,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,7,0.01022933361430963
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,3,0.00984533317387104
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,15,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,255,0.018810667097568512
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,255,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,127,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,511,0.025274666647116344
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,2047,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,4095,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,511,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,1023,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,1023,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,2047,0.05154666801293691
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,8191,0.21538132429122925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,8191,0.17442133029301962
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,16383,0.41794665654500324
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,4095,0.09248000383377075
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,16383,0.3380959828694661
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,1,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,32767,0.8427519798278809
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,32767,0.712719996770223
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,fp8,65535,1.6300427118937175
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,1,float16,float16,65535,2.2717439333597818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,3,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,7,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,15,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,31,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,127,0.010677333921194077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,127,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,4095,0.02048533285657565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,4095,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,32767,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,fp8,65535,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,65535,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,1,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,1,float16,float16,32767,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,15,0.010778666784365972
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,7,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,15,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,1023,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,4095,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,2047,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,8191,0.02049066623051961
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,4095,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,16383,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,16383,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,32767,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,32767,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,fp8,65535,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,3,0.010629333555698395
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,1,float16,float16,65535,0.10172266761461894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,3,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,15,0.0107893335322539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,255,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,255,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,63,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,127,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,2047,0.01775466650724411
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,4095,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,16383,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,4095,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,16383,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,8191,0.03924266745646795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,8191,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,32767,0.10240532954533894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,32767,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,float16,65535,0.18552533785502115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,1,0.30395734310150146
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,1,float16,fp8,65535,0.10172266761461894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,1,0.38758933544158936
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,3,0.39798935254414874
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,7,0.33058132727940875
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,3,0.3142026662826538
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,7,0.405839999516805
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,15,0.48213334878285724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,15,0.4184746742248535
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,31,0.4875893195470174
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,31,0.4184746742248535
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,63,0.42001068592071533
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,63,0.4899786710739136
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,127,0.49646933873494464
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,255,0.9606773058573405
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,fp8,255,0.7703893184661865
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,1,float16,float16,127,0.5822879870732626
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,1,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,7,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,1,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,15,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,3,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,7,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,3,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,31,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,31,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,63,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,15,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,63,0.04779199759165446
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,127,0.04951466619968414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,127,0.043706665436426796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,1,0.06622399886449178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,float16,255,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,1,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,1,float16,fp8,255,0.04333333174387614
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,3,0.051541333397229515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,15,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,3,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,7,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,7,0.05153599878152212
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,31,0.06485866506894429
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,15,0.052229334910710655
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,31,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,63,0.05189333359400431
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,63,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,127,0.05187733471393585
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,fp8,255,0.053247998158137
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,255,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,1,0.11401599645614624
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,1,float16,float16,127,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,3,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,1,0.08601599931716919
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,3,0.08532800277074178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,7,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,15,0.08564266562461853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,7,0.08533333738644917
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,31,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,15,0.11229866743087769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,31,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,63,0.08603733777999878
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,127,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,127,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,255,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,fp8,255,0.08737599849700928
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,1,float16,float16,63,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,1,0.768671989440918
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,1,0.6023253202438354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,3,0.7917226950327555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,3,0.6236053307851156
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,7,0.6545066833496094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,7,0.8063999811808268
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,15,0.9610239664713541
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,15,0.8326826890309652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,31,0.9695786635080973
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,31,0.8313226699829102
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,63,0.9842346509297689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,63,0.8340480327606201
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,float16,127,1.262074629465739
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,1,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,1,float16,fp8,127,0.992415984471639
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,1,0.08191999793052673
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,3,0.08872532844543457
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,7,0.08841066559155782
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,3,0.08224533498287201
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,7,0.08021333316961925
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,15,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,15,0.08226666847864787
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,31,0.08840533097585042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,31,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,63,0.08878399928410848
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,63,0.08125866452852885
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,1,0.12083199620246887
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,1,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,float16,127,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,1,float16,fp8,127,0.08054933448632558
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,3,0.12150933345158894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,3,0.09284266829490662
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,7,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,7,0.09250666697820027
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,15,0.12116799751917522
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,15,0.09454400340716045
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,31,0.09387200077374776
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,31,0.12151466806729634
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,63,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,63,0.09386666615804036
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,float16,127,0.12117333213488261
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,1,float16,fp8,127,0.09250133236249287
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,1,0.2136746644973755
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,1,0.1609333356221517
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,3,0.21541333198547363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,3,0.15991999705632529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,7,0.2146986722946167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,7,0.15990933775901794
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,31,0.21333332856496176
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,15,0.2136746644973755
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,15,0.15957333644231161
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,31,0.1609333356221517
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,1,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,3,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,63,0.21504000822703043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,3,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,63,0.16008533040682474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,31,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,31,0.02083733429511388
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,float16,127,0.21467200915018717
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,1,float16,fp8,127,0.1585493286450704
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,15,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,127,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,255,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,1023,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,1023,0.05529066423575083
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,2047,0.1109333336353302
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,255,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,511,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,511,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,2047,0.09454933802286784
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,4095,0.21128533283869425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,4095,0.17459199825922647
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,8191,0.41198933124542236
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,16383,0.836949348449707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,16383,0.6584320068359375
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,32767,1.4277973175048828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,3,0.010682666053374609
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,fp8,8191,0.3357013463973999
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,1,float16,float16,32767,3.705002784729004
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,15,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,31,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,63,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,127,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,255,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,511,0.012554666648308435
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,1023,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,511,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,4095,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,2047,0.016693333784739178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,16383,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,16383,0.02731200059254964
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,fp8,32767,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,1,float16,float16,32767,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,1,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,3,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,7,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,15,0.01062400018175443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,63,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,3,0.010672000547250112
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,31,0.011509332805871964
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,127,0.010480000327030817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,255,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,127,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,2047,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,1023,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,255,0.011498666057984034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,1023,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,2047,0.017050666113694508
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,4095,0.021850667893886566
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,8191,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,1,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,16383,0.060080001751581825
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,32767,0.061093335350354515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,1,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,float16,32767,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,1,float16,fp8,16383,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,15,0.011152000476916632
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,15,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,31,0.011637333780527115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,127,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,127,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,1023,0.014335999886194864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,511,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,2047,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,4095,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,1023,0.014741333822409311
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,8191,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,4095,0.02492800106604894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,16383,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,8191,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,16383,0.10342400272687276
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,3,0.011258666714032492
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,float16,32767,0.18824533621470133
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,1,0.010746666540702185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,1,float16,fp8,32767,0.10718400279680888
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,15,0.010666667173306147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,31,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,127,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,31,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,255,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,255,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,4095,0.01912533367673556
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,2047,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,4095,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,8191,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,16383,0.041290665666262306
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,16383,0.0273333340883255
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,3,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,float16,32767,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,1,float16,fp8,32767,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,7,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,31,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,31,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,15,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,127,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,127,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,63,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,63,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,255,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,1023,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,511,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,1023,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,2047,0.017781333376963932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,4095,0.020138667275508244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,8191,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,8191,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,16383,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,3,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,1,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,16383,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,float16,32767,0.10205866893132527
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,1,float16,fp8,32767,0.06108800073464712
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,7,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,3,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,15,0.011610666910807291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,31,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,7,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,63,0.011978667229413986
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,255,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,127,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,127,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,63,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,1023,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,1023,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,2047,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,8191,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,4095,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,8191,0.04027199993530909
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,4095,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,16383,0.1030613382657369
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,1,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,16383,0.06351999938488007
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,float16,32767,0.18773333231608072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,7,0.012938667088747025
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,3,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,1,float16,fp8,32767,0.10820800065994263
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,7,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,15,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,127,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,255,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,63,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,511,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,1023,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,511,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,1023,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,2047,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,2047,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,4095,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,8191,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,8191,0.0641653339068095
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,4095,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,16383,0.19012266397476196
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,1,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,1,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,16383,0.10820266604423523
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,float16,32767,0.3619840145111084
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,1,float16,fp8,32767,0.19728533426920572
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,63,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,127,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,31,0.0106133334338665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,511,0.010847999403874079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,1023,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,2047,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,8191,0.018794666975736618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,8191,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,16383,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,4095,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,4095,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,32767,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,65535,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,32767,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,65535,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,float16,131071,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,1,float16,fp8,131071,0.04744000236193339
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,1,0.010794666906197866
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,3,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,7,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,15,0.010853332777818045
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,63,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,2047,0.010661333799362183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,1023,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,4095,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,4095,0.012661332885424295
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,16383,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,32767,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,32767,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,1,0.010687999427318573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,65535,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,65535,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,fp8,131071,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,1,float16,float16,131071,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,31,0.010469333579142889
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,15,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,255,0.010480000327030817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,1023,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,1023,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,4095,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,2047,0.010981333752473196
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,511,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,2047,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,4095,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,8191,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,16383,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,16383,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,32767,0.02317333221435547
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,65535,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,32767,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,65535,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,float16,131071,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,1,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,1,float16,fp8,131071,0.03109866629044215
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,3,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,127,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,255,0.010682666053374609
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,63,0.01080000028014183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,511,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,1023,0.011616000284751257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,2047,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,4095,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,4095,0.016037333756685257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,8191,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,16383,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,32767,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,32767,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,16383,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,65535,0.026949333647886913
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,1,0.010464000205198923
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,65535,0.039274667700131737
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,float16,131071,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,1,float16,fp8,131071,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,7,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,31,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,15,0.010682666053374609
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,127,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,511,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,255,0.011642667154471079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,2047,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,2047,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,4095,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,4095,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,16383,0.02693866689999898
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,8191,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,8191,0.02083733429511388
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,16383,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,32767,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,65535,0.032773333291212715
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,32767,0.03070933371782303
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,65535,0.031744000812371574
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,float16,131071,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,1,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,3,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,1,float16,fp8,131071,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,31,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,31,0.010682666053374609
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,63,0.01055466632048289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,255,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,511,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,2047,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,1023,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,1023,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,4095,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,4095,0.013306666165590286
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,8191,0.018805333723624546
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,32767,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,32767,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,1,0.010469333579142889
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,65535,0.025936000049114227
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,65535,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,7,0.010501333822806677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,float16,131071,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,1,float16,fp8,131071,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,31,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,15,0.010437333335479101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,127,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,255,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,511,0.010682666053374609
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,511,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,2047,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,2047,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,1023,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,1023,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,8191,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,16383,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,16383,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,65535,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,32767,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,32767,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,65535,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,fp8,131071,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,1,float16,float16,131071,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,15,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,31,0.010693332801262537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,255,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,63,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,255,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,511,0.01121066634853681
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,1023,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,511,0.010794666906197866
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,2047,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,4095,0.01741333305835724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,16383,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,32767,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,32767,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,1,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,65535,0.03859733293453852
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,65535,0.05870933334032694
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,float16,131071,0.09966933727264404
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,1,float16,fp8,131071,0.06006399790445963
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,31,0.010698666175206503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,3,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,3,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,127,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,63,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,127,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,255,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,255,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,511,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,1023,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,8191,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,4095,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,1,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,8191,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,1,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,float16,16383,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,3,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,1,float16,fp8,16383,0.04368533194065094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,3,0.012293333808581034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,7,0.010698666175206503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,7,0.011333333949247995
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,15,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,15,0.010853332777818045
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,31,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,127,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,63,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,255,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,127,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,511,0.012725333372751871
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,1023,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,2047,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,4095,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,4095,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,8191,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,8191,0.061103999614715576
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,1,0.013994666437307993
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,fp8,16383,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,7,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,1,float16,float16,16383,0.10341866811116536
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,31,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,63,0.013647999614477158
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,15,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,63,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,255,0.012896000097195307
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,127,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,511,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,1023,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,2047,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,2047,0.0262773334980011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,4095,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,1023,0.017397332936525345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,4095,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,1,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,8191,0.10410666465759277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,8191,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,float16,16383,0.18961066007614136
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,7,0.016389333953460056
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,1,float16,fp8,16383,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,3,0.014752000570297241
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,127,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,63,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,255,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,1023,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,1023,0.024906667570273083
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,2047,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,4095,0.06621333460013072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,2047,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,4095,0.105813334385554
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,8191,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,8191,0.1904639999071757
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,7,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,fp8,16383,0.19746132691701254
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,3,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,1,float16,float16,16383,0.3643733263015747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,15,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,31,0.010480000327030817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,15,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,63,0.010890666395425797
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,127,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,63,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,127,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,1023,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,2047,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,1023,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,2047,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,4095,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,8191,0.022842665513356526
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,1,0.010464000205198923
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,32767,0.026954665780067444
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,32767,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,65535,0.03071466585000356
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,3,0.010698666175206503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,65535,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,float16,131071,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,1,float16,fp8,131071,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,3,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,15,0.010677333921194077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,15,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,127,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,31,0.010863999525705973
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,127,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,511,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,511,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,255,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,1023,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,1023,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,4095,0.018757333358128864
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,2047,0.013647999614477158
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,32767,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,32767,0.023215999205907185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,16383,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,65535,0.04370133578777313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,3,0.010693332801262537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,65535,0.029029332101345062
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,float16,131071,0.06413866579532623
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,1,float16,fp8,131071,0.04711466530958811
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,7,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,15,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,3,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,7,0.010949333508809408
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,63,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,127,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,31,0.01055466632048289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,511,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,127,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,255,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,1023,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,1023,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,4095,0.018746666610240936
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,8191,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,8191,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,16383,0.022218666970729828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,32767,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,65535,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,32767,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,65535,0.04162666698296865
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,1,0.010480000327030817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,float16,131071,0.10205866893132527
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,1,float16,fp8,131071,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,15,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,31,0.010618666807810465
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,255,0.010773333410422007
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,511,0.01089599976936976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,4095,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,2047,0.016063999384641647
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,16383,0.03925866633653641
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,16383,0.02492800106604894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,32767,0.03959999978542328
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,65535,0.10171199838320415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,32767,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,65535,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,float16,131071,0.18926932414372763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,1,float16,fp8,131071,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,7,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,15,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,7,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,63,0.011626667032639185
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,31,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,127,0.012293333808581034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,255,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,511,0.012981332838535309
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,1023,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,4095,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,4095,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,7,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,float16,8191,0.06211199859778086
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,15,0.012896000097195307
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,7,0.013013333082199097
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,1,float16,fp8,8191,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,255,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,63,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,511,0.015370666980743408
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,63,0.012703999876976013
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,255,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,511,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,2047,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,1023,0.018085333208243053
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,1023,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,4095,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,2047,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,8191,0.06554133196671803
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,fp8,4095,0.042319998145103455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,1,float16,float16,8191,0.10512000322341919
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,3,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,127,0.01670933390657107
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,127,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,63,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,255,0.015029333531856537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,1023,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,511,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,2047,0.06246933341026306
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,1023,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,4095,0.10615467031796773
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,2047,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,1,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,4095,0.06622399886449178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,float16,8191,0.19131733973821005
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,1,float16,fp8,8191,0.10957866907119751
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,3,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,1,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,3,0.022853332261244457
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,7,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,7,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,15,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,31,0.019141333798567455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,15,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,255,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,255,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,63,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,127,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,511,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,127,0.022181332111358643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,511,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,1023,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,2047,0.09898666540781657
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,1023,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,4095,0.10102933645248413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,2047,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,1,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,1,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,8191,0.33604268232981366
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,float16,4095,0.17868266503016153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,3,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,1,float16,fp8,8191,0.1786880095799764
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,7,0.014778666198253632
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,15,0.01469333345691363
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,63,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,15,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,127,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,255,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,511,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,1,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,1023,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,float16,2047,0.04232533276081085
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,1,float16,fp8,2047,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,15,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,1,0.01637866720557213
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,63,0.01637866720557213
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,127,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,31,0.016042667130629223
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,255,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,511,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,1023,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,511,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,255,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,127,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,1,0.022853332261244457
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,1023,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,float16,2047,0.06382399797439575
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,1,float16,fp8,2047,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,7,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,3,0.019461333751678467
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,15,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,15,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,63,0.019141333798567455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,31,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,7,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,127,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,127,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,255,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,1023,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,255,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,511,0.03481066723664602
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,511,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,1,0.027637332677841187
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,2047,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,1,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,3,0.028330666323502857
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,fp8,2047,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,1,float16,float16,1023,0.055311997731526695
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,3,0.0334346666932106
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,7,0.028330666323502857
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,31,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,7,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,15,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,127,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,31,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,127,0.02731200059254964
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,63,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,255,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,63,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,511,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,255,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,1023,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,511,0.05632533133029938
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,1023,0.09660800298055013
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,1,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,fp8,2047,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,1,float16,float16,2047,0.17885865767796835
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,3,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,3,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,15,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,7,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,15,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,7,0.018789333601792652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,31,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,31,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,63,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,63,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,255,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,127,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,255,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,1023,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,float16,127,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,1,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,511,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,1,float16,fp8,1023,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,7,0.024570666253566742
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,3,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,15,0.024234667420387268
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,3,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,31,0.023904000719388325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,63,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,15,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,127,0.023887999355793
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,7,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,31,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,255,0.02457600086927414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,255,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,127,0.020143999407688778
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,511,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,511,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,3,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,1,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,1,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,fp8,1023,0.03788800040880839
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,7,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,3,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,1,float16,float16,1023,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,15,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,15,0.02902399996916453
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,7,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,63,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,31,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,127,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,31,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,63,0.028991999725500744
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,255,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,511,0.05769599974155426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,255,0.029002666473388672
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,127,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,float16,1023,0.09693866968154907
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,1,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,511,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,3,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,1,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,1,float16,fp8,1023,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,7,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,3,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,15,0.059392000238100685
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,31,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,15,0.045408000548680626
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,63,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,31,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,7,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,127,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,63,0.06006399790445963
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,255,0.060080001751581825
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,127,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,511,0.09864532947540283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,255,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,511,0.07031466563542683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,fp8,1023,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,1,float16,float16,1023,0.17851734161376953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,31,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,255,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,511,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,127,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,1023,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,511,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,2047,0.014698666830857595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,1023,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,4095,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,8191,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,32767,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,8191,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,16383,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,32767,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,1,0.010677333921194077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,65535,0.045050665736198425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,65535,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,1,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,float16,131071,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,1,float16,fp8,131071,0.04948266843954722
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,7,0.010992000500361124
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,63,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,63,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,127,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,255,0.01080000028014183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,511,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,255,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,2047,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,1023,0.012293333808581034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,1023,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,4095,0.018810667097568512
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,4095,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,8191,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,16383,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,32767,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,32767,0.027280000348885853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,65535,0.06211199859778086
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,65535,0.04165866722663244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,float16,131071,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,1,0.010501333822806677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,1,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,1,float16,fp8,131071,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,7,0.010666667173306147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,7,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,63,0.010661333799362183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,63,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,511,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,255,0.010480000327030817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,1023,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,4095,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,4095,0.017055999487638474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,8191,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,8191,0.022858666876951855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,16383,0.038906666139761605
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,32767,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,32767,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,65535,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,65535,0.061093335350354515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,1,0.01089599976936976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,float16,131071,0.1879040002822876
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,1,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,7,0.010677333921194077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,3,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,1,float16,fp8,131071,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,3,0.01098666712641716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,15,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,127,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,511,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,511,0.011690666278203329
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,255,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,1023,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,2047,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,4095,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,4095,0.01913600042462349
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,16383,0.05974400043487549
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,2047,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,8191,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,16383,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,8191,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,32767,0.1013759970664978
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,65535,0.10205866893132527
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,32767,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,65535,0.18517333269119263
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,3,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,1,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,float16,131071,0.35651731491088867
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,1,float16,fp8,131071,0.18927466869354248
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,1,0.027295999228954315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,3,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,7,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,15,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,7,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,15,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,63,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,31,0.029343999922275543
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,31,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,63,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,127,0.0269813338915507
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,127,0.029359998802344005
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,255,0.02903999884923299
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,3,0.03751999884843826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,1,0.03718400001525879
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,255,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,float16,511,0.04094400008519491
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,1,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,3,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,1,float16,fp8,511,0.0334346666932106
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,7,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,7,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,31,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,15,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,15,0.03107733279466629
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,63,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,31,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,63,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,127,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,127,0.0310506671667099
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,255,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,255,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,1,0.04779199759165446
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,3,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,1,0.06178666651248932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,float16,511,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,1,float16,fp8,511,0.044026667873064675
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,3,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,7,0.061434666315714516
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,15,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,7,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,15,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,63,0.047450666626294456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,31,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,31,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,63,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,127,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,127,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,255,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,3,0.10884799559911092
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,1,0.08053866525491078
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,255,0.05051200091838837
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,1,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,float16,511,0.10206933816274007
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,3,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,1,float16,fp8,511,0.07203733424345653
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,7,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,7,0.10922132929166158
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,15,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,63,0.11059199770291646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,15,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,31,0.10921600461006165
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,31,0.08227733274300893
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,127,0.10889599720637004
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,63,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,127,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,255,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,255,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,float16,511,0.18312533696492514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,1,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,1,float16,fp8,511,0.1262933313846588
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,1,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,3,0.04779199759165446
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,3,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,7,0.04915200173854828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,7,0.04574400186538696
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,31,0.04541333516438802
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,15,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,15,0.0484799991051356
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,31,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,63,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,63,0.048810665806134544
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,127,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,127,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,1,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,1,0.05153066913286845
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,float16,255,0.05017599960168203
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,3,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,3,0.052239999175071716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,1,float16,fp8,255,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,7,0.06655466556549072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,7,0.05190399785836538
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,31,0.0641599992911021
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,15,0.06588266789913177
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,15,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,31,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,63,0.06621866424878438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,63,0.05190399785836538
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,127,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,127,0.051882664362589516
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,1,0.11365866661071777
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,3,0.11332266529401143
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,1,0.08703999718030293
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,7,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,fp8,255,0.05494933327039083
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,7,0.08635733524958293
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,1,float16,float16,255,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,3,0.08669867118199666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,15,0.1133066713809967
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,31,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,15,0.08669333656628926
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,31,0.08738133311271667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,63,0.1129813293615977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,63,0.08633599678675334
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,127,0.11161067088445027
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,127,0.08774399757385254
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,float16,255,0.11502933502197266
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,1,float16,fp8,255,0.08874666690826416
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,7,0.2053119937578837
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,1,0.1544533371925354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,3,0.20753065745035806
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,3,0.1525759994983673
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,1,0.20751466353734335
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,7,0.15411200126012167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,15,0.20753065745035806
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,31,0.20548800627390543
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,31,0.154448002576828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,15,0.15377066532770792
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,63,0.15291733543078104
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,63,0.20722132921218872
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,1,0.010666667173306147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,127,0.20719999074935913
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,127,0.15223466356595358
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,7,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,7,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,float16,255,0.20736533403396606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,31,0.010629333555698395
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,63,0.010666667173306147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,1,float16,fp8,255,0.1539253294467926
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,127,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,63,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,127,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,511,0.012671999633312225
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,255,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,1023,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,2047,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,4095,0.020143999407688778
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,8191,0.021509334444999695
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,16383,0.024933333198229473
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,16383,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,32767,0.04332800209522247
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,1,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,32767,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,3,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,7,0.010778666784365972
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,fp8,65535,0.04505600035190582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,1,float16,float16,65535,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,31,0.010677333921194077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,15,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,63,0.010682666053374609
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,511,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,511,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,255,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,2047,0.014997333288192749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,4095,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,8191,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,16383,0.03959999978542328
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,32767,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,1,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,1,0.010618666807810465
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,32767,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,fp8,65535,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,15,0.0106133334338665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,3,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,1,float16,float16,65535,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,31,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,127,0.010687999427318573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,63,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,63,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,511,0.01116266722480456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,1023,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,8191,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,2047,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,8191,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,4095,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,16383,0.06009600063165029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,32767,0.10171733299891154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,3,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,3,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,7,0.011855999628702799
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,32767,0.06076266864935557
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,fp8,65535,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,1,float16,float16,65535,0.18636800845464072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,31,0.011157333850860596
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,63,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,31,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,63,0.01184533288081487
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,255,0.011610666910807291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,127,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,1023,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,2047,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,511,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,4095,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,8191,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,2047,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,4095,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,8191,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,16383,0.10376532872517903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,16383,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,32767,0.18895467122395834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,1,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,1,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,3,0.010677333921194077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,32767,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,float16,65535,0.35891199111938477
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,1,float16,fp8,65535,0.19524266322453818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,7,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,15,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,7,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,15,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,63,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,127,0.010629333555698395
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,63,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,511,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,1023,0.012272000312805176
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,1023,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,4095,0.018432000031073887
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,16383,0.03994133323431015
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,16383,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,1,0.0106133334338665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,32767,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,32767,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,3,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,fp8,65535,0.061797335743904114
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,1,float16,float16,65535,0.10342400272687276
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,63,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,31,0.01090666651725769
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,63,0.011247999966144562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,255,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,511,0.012522666404644648
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,255,0.010618666807810465
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,127,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,1023,0.012650666137536367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,4095,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,2047,0.017071999609470367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,4095,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,8191,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,8191,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,16383,0.03957333415746689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,16383,0.061109334230422974
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,1,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,32767,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,32767,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,1,0.011952000359694162
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,3,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,fp8,65535,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,1,float16,float16,65535,0.1868799924850464
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,7,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,15,0.010842667271693548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,255,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,127,0.011861333002646765
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,255,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,63,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,127,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,511,0.012661332885424295
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,1023,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,1023,0.012842666357755661
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,2047,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,2047,0.022848000129063923
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,4095,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,4095,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,8191,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,8191,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,16383,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,16383,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,32767,0.18722132841746011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,32767,0.10786133011182149
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,7,0.010239999741315842
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,float16,65535,0.3582293192545573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,1,float16,fp8,65535,0.1960960030555725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,31,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,31,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,511,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,1023,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,511,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,1023,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,2047,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,2047,0.011957333733638128
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,8191,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,4095,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,4095,0.012602667013804117
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,16383,0.022895999252796173
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,32767,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,32767,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,16383,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,1,0.0106133334338665
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,1,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,3,0.010698666175206503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,65535,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,65535,0.0430026650428772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,fp8,131071,0.04471466441949209
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,1,float16,float16,131071,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,7,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,15,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,63,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,15,0.010693332801262537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,127,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,255,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,1023,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,2047,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,1023,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,511,0.011333333949247995
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,4095,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,4095,0.012730666746695837
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,32767,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,32767,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,65535,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,1,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,float16,131071,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,65535,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,1,float16,fp8,131071,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,127,0.010458666831254959
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,63,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,15,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,255,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,511,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,4095,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,2047,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,4095,0.012602667013804117
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,2047,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,1023,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,1023,0.011274666835864386
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,16383,0.020810666183630627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,16383,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,32767,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,32767,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,65535,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,65535,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,1,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,fp8,131071,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,1,float16,float16,131071,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,31,0.010469333579142889
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,7,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,63,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,127,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,255,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,511,0.011616000284751257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,1023,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,2047,0.012602667013804117
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,4095,0.012991999586423239
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,4095,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,8191,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,16383,0.025600001215934753
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,8191,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,32767,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,32767,0.028981332977612812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,1,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,65535,0.03276800115903219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,65535,0.030720000465710957
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,1,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,float16,131071,0.03583999971548716
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,7,0.01080000028014183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,3,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,1,float16,fp8,131071,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,31,0.010469333579142889
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,63,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,127,0.01080000028014183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,127,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,255,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,1023,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,511,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,2047,0.012709333250919977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,1023,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,4095,0.012618667135636011
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,2047,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,8191,0.01945066700379054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,4095,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,16383,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,32767,0.022890667120615642
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,32767,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,65535,0.02589866767326991
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,1,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,65535,0.02731200059254964
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,float16,131071,0.046426668763160706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,1,float16,fp8,131071,0.03310399999221166
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,3,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,31,0.010842667271693548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,63,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,255,0.01080000028014183
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,127,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,2047,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,511,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,2047,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,1023,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,8191,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,16383,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,16383,0.019802667200565338
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,32767,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,32767,0.023183998962243397
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,65535,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,65535,0.04162666698296865
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,1,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,7,0.010687999427318573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,fp8,131071,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,1,float16,float16,131071,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,3,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,31,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,31,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,7,0.011157333850860596
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,63,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,127,0.010709332923094431
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,127,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,2047,0.01809599995613098
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,4095,0.02117866774400075
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,2047,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,4095,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,8191,0.02492800106604894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,8191,0.04027733455101649
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,16383,0.03994133323431015
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,16383,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,3,0.011637333780527115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,fp8,32767,0.06109866499900818
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,1,float16,float16,32767,0.10273599624633789
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,7,0.011242666592200598
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,7,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,63,0.010682666053374609
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,31,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,127,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,63,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,255,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,255,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,1023,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,2047,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,4095,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,2047,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,8191,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,4095,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,8191,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,16383,0.10376532872517903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,16383,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,3,0.012869333227475485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,1,0.013007999708255133
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,1,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,3,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,fp8,32767,0.10854400197664897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,15,0.01332266628742218
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,1,float16,float16,32767,0.18913066387176514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,15,0.012517333030700684
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,31,0.012954667210578918
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,63,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,63,0.012944000462690989
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,127,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,511,0.01637866720557213
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,1023,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,255,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,511,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,2047,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,2047,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,4095,0.04301333427429199
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,4095,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,8191,0.0641653339068095
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,1,0.010618666807810465
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,8191,0.10377599795659383
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,16383,0.18995199600855509
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,16383,0.10923199852307637
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,3,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,fp8,32767,0.1974666714668274
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,1,float16,float16,32767,0.36061867078145343
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,15,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,31,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,7,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,63,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,511,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,511,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,1023,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,1023,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,16383,0.024570666253566742
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,4095,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,4095,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,2047,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,16383,0.023232000569502514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,2047,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,32767,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,32767,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,65535,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,65535,0.028688001135985058
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,1,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,3,0.010480000327030817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,fp8,131071,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,3,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,31,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,1,float16,float16,131071,0.0481279989083608
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,1023,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,511,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,255,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,2047,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,127,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,2047,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,4095,0.01809599995613098
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,16383,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,8191,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,16383,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,32767,0.024901332954565685
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,65535,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,32767,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,float16,131071,0.06690133114655812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,65535,0.02934933453798294
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,1,float16,fp8,131071,0.047466665506362915
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,31,0.010543999572594961
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,127,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,63,0.010501333822806677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,127,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,1023,0.011194666226704916
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,511,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,511,0.011839999506870905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,1023,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,4095,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,8191,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,8191,0.02080533280968666
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,16383,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,32767,0.02697066714366277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,65535,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,32767,0.041637333730856575
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,1,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,65535,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,float16,131071,0.10240532954533894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,1,float16,fp8,131071,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,3,0.012186666329701742
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,15,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,7,0.012298667182525
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,15,0.010666667173306147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,127,0.011674666156371435
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,127,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,255,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,255,0.012400000045696894
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,511,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,4095,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,2047,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,8191,0.06180266539255778
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,1,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,4095,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,8191,0.04095999896526337
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,3,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,3,0.013637332866589228
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,7,0.013658666362365087
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,15,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,fp8,16383,0.0641599992911021
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,7,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,1,float16,float16,16383,0.10410666465759277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,31,0.012666666259368261
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,15,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,63,0.013728000223636627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,255,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,511,0.01470400020480156
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,127,0.012879999975363413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,127,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,511,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,1023,0.017407999684413273
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,255,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,2047,0.040618665516376495
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,1023,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,2047,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,4095,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,1,0.014655999839305878
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,4095,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,1,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,8191,0.1050986647605896
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,8191,0.06417066852251689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,fp8,16383,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,3,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,1,float16,float16,16383,0.19081066052118936
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,7,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,31,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,15,0.01708799973130226
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,15,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,31,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,63,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,63,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,127,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,127,0.01674666628241539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,255,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,1023,0.0365226666132609
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,1023,0.023557332654794056
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,2047,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,2047,0.062128002444903054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,4095,0.06519466638565063
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,4095,0.10511466860771179
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,8191,0.19080533583958945
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,8191,0.10956799983978271
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,1,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,float16,16383,0.36402666568756104
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,1,float16,fp8,16383,0.19849065939585367
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,1,0.012997332960367203
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,15,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,15,0.012949333836634954
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,7,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,3,0.012805332740147909
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,31,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,63,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,127,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,255,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,63,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,255,0.014661333213249842
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,127,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,511,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,1023,0.01775466650724411
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,511,0.015024000157912573
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,2047,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,1,0.016794666647911072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,2047,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,3,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,float16,4095,0.0641653339068095
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,1,float16,fp8,4095,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,31,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,3,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,15,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,127,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,63,0.016389333953460056
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,31,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,255,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,511,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,511,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,255,0.014896000425020853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,1023,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,1023,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,2047,0.04301333427429199
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,2047,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,float16,4095,0.10683733224868774
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,1,float16,fp8,4095,0.06587199866771698
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,1,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,1,0.01945066700379054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,7,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,3,0.02183466653029124
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,3,0.01945066700379054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,15,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,15,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,7,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,63,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,31,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,127,0.019461333751678467
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,63,0.019141333798567455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,31,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,127,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,511,0.03482666611671448
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,255,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,1023,0.05428266525268555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,511,0.025258667767047882
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,2047,0.10001599788665771
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,1023,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,float16,4095,0.17765865723292032
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,2047,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,3,0.016384000579516094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,1,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,1,float16,fp8,4095,0.10104533036549886
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,3,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,7,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,7,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,31,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,15,0.01704000060757001
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,31,0.015696000307798386
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,63,0.017082666357358296
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,127,0.015029333531856537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,127,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,255,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,255,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,1,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,1023,0.024885334074497223
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,511,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,1023,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,fp8,2047,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,3,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,1,float16,float16,2047,0.06382933259010315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,7,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,7,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,15,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,15,0.019109333554903667
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,31,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,63,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,31,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,127,0.019797333826621372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,511,0.035487999518712364
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,63,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,511,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,1,0.03513599932193756
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,1023,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,1023,0.05598400036493937
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,3,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,1,0.027994667490323383
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,3,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,float16,2047,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,1,float16,fp8,2047,0.0631466656923294
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,7,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,7,0.027301333844661713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,15,0.02834133307139079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,15,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,31,0.027653334041436512
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,63,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,127,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,31,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,63,0.03345066557327906
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,127,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,255,0.03686933219432831
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,511,0.04233066737651825
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,511,0.05598400036493937
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,255,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,1023,0.09624000390370686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,1,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,7,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,float16,2047,0.179365336894989
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,1023,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,1,float16,fp8,2047,0.10479467113812764
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,31,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,255,0.01062400018175443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,1023,0.010965333630641302
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,1023,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,2047,0.013647999614477158
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,16383,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,2047,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,32767,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,16383,0.022895999252796173
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,65535,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,32767,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,65535,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,3,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,fp8,131071,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,1,float16,float16,131071,0.06656000018119812
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,3,0.01089599976936976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,7,0.010490667074918747
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,7,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,127,0.010453333457310995
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,31,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,63,0.010543999572594961
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,1023,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,511,0.012256000190973282
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,2047,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,4095,0.018751999984184902
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,16383,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,32767,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,32767,0.043354665239652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,16383,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,1,0.010469333579142889
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,65535,0.041984001795450844
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,1,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,3,0.010693332801262537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,65535,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,float16,131071,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,7,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,1,float16,fp8,131071,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,15,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,31,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,255,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,2047,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,511,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,1023,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,4095,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,8191,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,32767,0.03958400090535482
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,16383,0.02489600082238515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,16383,0.038912000755469
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,32767,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,65535,0.10205866893132527
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,1,0.023546665906906128
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,65535,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,float16,131071,0.18756266434987387
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,1,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,3,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,1,float16,fp8,131071,0.10376532872517903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,3,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,7,0.023200000325838726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,7,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,15,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,15,0.020842666427294414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,63,0.023872000475724537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,127,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,127,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,31,0.023552000522613525
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,31,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,63,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,255,0.023226665953795116
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,1,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,511,0.02629333237806956
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,255,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,1,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,511,0.036501333117485046
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,float16,1023,0.05597866574923197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,1,float16,fp8,1023,0.038917332887649536
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,7,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,3,0.02902399996916453
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,3,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,7,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,15,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,15,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,31,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,63,0.035461333890755974
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,31,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,63,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,127,0.03516799956560135
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,127,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,255,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,255,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,511,0.0576853354771932
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,511,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,3,0.05836800237496694
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,1,0.04574933151404063
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,1,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,3,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,7,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,7,0.04573333263397217
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,float16,1023,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,1,float16,fp8,1023,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,15,0.05801066756248474
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,15,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,63,0.05972800155480703
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,63,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,31,0.04539200166861216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,31,0.05905066430568695
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,127,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,127,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,255,0.061093335350354515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,255,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,511,0.0993280013402303
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,511,0.07032533486684163
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,float16,1023,0.1764693260192871
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,1,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,1,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,1,float16,fp8,1023,0.10820266604423523
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,3,0.03754666695992152
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,3,0.03139200061559677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,7,0.0314026673634847
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,7,0.03753600021203359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,15,0.03754133234421412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,15,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,31,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,127,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,31,0.031397332747777305
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,63,0.03719999889532725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,63,0.031040000418821972
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,127,0.03105599929889043
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,1,0.04778666794300079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,3,0.048469334840774536
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,255,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,1,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,255,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,3,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,fp8,511,0.044362664222717285
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,7,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,1,float16,float16,511,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,7,0.047770669062932335
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,15,0.061103999614715576
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,31,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,15,0.04779199759165446
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,31,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,63,0.06005866825580597
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,63,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,127,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,127,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,255,0.062133332093556724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,1,0.10821866989135742
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,255,0.04949333270390829
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,1,0.08260266482830048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,3,0.08226133386294048
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,3,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,7,0.08055466910203297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,fp8,511,0.07236266632874806
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,1,float16,float16,511,0.10102933645248413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,7,0.10923199852307637
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,15,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,15,0.08223466575145721
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,31,0.10990933577219646
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,31,0.08089600006739299
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,63,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,63,0.08089066545168559
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,127,0.08225599924723308
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,127,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,255,0.10922666390736897
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,7,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,255,0.08396800359090169
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,31,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,63,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,127,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,float16,511,0.18466132879257202
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,31,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,127,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,1,float16,fp8,511,0.1269760032494863
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,255,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,255,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,1023,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,1023,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,4095,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,8191,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,8191,0.019461333751678467
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,32767,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,16383,0.02288000037272771
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,32767,0.04161600023508072
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,1,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,65535,0.062458669145902
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,65535,0.04164266586303711
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,fp8,131071,0.06314133107662201
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,3,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,1,float16,float16,131071,0.10342400272687276
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,63,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,63,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,15,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,511,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,511,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,4095,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,2047,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,8191,0.022874665757020313
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,8191,0.019141333798567455
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,32767,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,1,0.01081066702802976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,32767,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,1,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,65535,0.10410666465759277
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,3,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,3,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,65535,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,fp8,131071,0.10513599713643391
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,7,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,15,0.010778666784365972
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,31,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,1,float16,float16,131071,0.1890986760457357
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,15,0.01089599976936976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,63,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,511,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,8191,0.038922667503356934
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,4095,0.021503999829292297
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,8191,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,2047,0.01809599995613098
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,16383,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,4095,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,32767,0.06076266864935557
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,32767,0.10203199585278828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,1,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,65535,0.18500800927480063
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,65535,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,7,0.010890666395425797
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,3,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,fp8,131071,0.18927466869354248
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,1,float16,float16,131071,0.3537919918696086
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,31,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,127,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,1023,0.012527999778588613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,8191,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,8191,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,16383,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,1,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,32767,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,32767,0.04061333338419596
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,3,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,65535,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,65535,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,fp8,131071,0.10512533783912659
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,31,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,1,float16,float16,131071,0.19012266397476196
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,63,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,127,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,255,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,2047,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,2047,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,511,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,4095,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,8191,0.025263999899228413
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,4095,0.02184533327817917
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,8191,0.039247999588648476
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,16383,0.06007466713587443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,16383,0.04027733455101649
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,32767,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,1,0.10240000486373901
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,32767,0.062133332093556724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,65535,0.10308266679445903
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,65535,0.18652266263961792
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,3,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,fp8,131071,0.19028266270955405
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,15,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,1,float16,float16,131071,0.35549867153167725
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,31,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,63,0.010234666367371878
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,63,0.010480000327030817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,127,0.010981333752473196
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,1023,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,255,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,1023,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,2047,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,4095,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,8191,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,16383,0.023205332458019257
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,8191,0.018090666582187016
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,4095,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,2047,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,32767,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,32767,0.03515200068553289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,65535,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,65535,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,1,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,float16,131071,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,1,float16,fp8,131071,0.04369066655635834
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,7,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,63,0.010773333410422007
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,31,0.010842667271693548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,63,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,1023,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,1023,0.010933333386977514
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,2047,0.012533333152532578
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,511,0.01116266722480456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,4095,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,2047,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,8191,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,4095,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,16383,0.024890666206677754
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,16383,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,32767,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,32767,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,65535,0.030368000268936157
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,65535,0.030037333567937214
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,1,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,1,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,fp8,131071,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,1,float16,float16,131071,0.035487999518712364
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,3,0.010672000547250112
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,7,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,63,0.010703999549150467
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,31,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,511,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,127,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,1023,0.010693332801262537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,2047,0.012613333761692047
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,4095,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,4095,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,16383,0.025253333151340485
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,8191,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,8191,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,32767,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,32767,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,65535,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,65535,0.032431999842325844
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,1,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,float16,131071,0.035504000882307686
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,1,float16,fp8,131071,0.034490667283535004
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,3,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,15,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,31,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,127,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,511,0.01062400018175443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,511,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,1023,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,1023,0.012661332885424295
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,4095,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,2047,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,8191,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,4095,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,8191,0.019120000302791595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,16383,0.022890667120615642
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,32767,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,32767,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,65535,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,65535,0.027647999425729115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,fp8,131071,0.031061333914597828
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,1,float16,float16,131071,0.04642133414745331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,7,0.010837333897749582
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,31,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,63,0.010954666882753372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,511,0.012543999900420507
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,127,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,255,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,1023,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,2047,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,2047,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,4095,0.0229066660006841
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,4095,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,8191,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,16383,0.06075733403364817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,8191,0.024911999702453613
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,16383,0.04127466678619385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,1,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,1,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,32767,0.10310399532318115
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,32767,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,3,0.010863999525705973
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,7,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,3,0.01228800043463707
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,31,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,float16,65535,0.18756266434987387
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,15,0.011258666714032492
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,1,float16,fp8,65535,0.10308800141016643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,15,0.011946666985750198
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,63,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,63,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,127,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,511,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,2047,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,511,0.012645332763592402
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,255,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,1023,0.013642666240533194
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,1023,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,2047,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,4095,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,4095,0.025941332181294758
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,8191,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,8191,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,16383,0.10274133086204529
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,1,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,16383,0.062122667829195656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,32767,0.18824533621470133
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,1,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,3,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,32767,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,float16,65535,0.358570655186971
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,15,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,7,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,1,float16,fp8,65535,0.1971199909845988
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,31,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,127,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,1023,0.010698666175206503
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,511,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,255,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,2047,0.012634667257467905
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,2047,0.014666666587193808
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,1023,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,4095,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,4095,0.014682666709025701
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,16383,0.022885332504908245
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,32767,0.0266239990790685
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,32767,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,65535,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,65535,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,1,0.010559999694426855
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,7,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,fp8,131071,0.03310933212439219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,1,float16,float16,131071,0.049498667319615684
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,15,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,31,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,31,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,63,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,127,0.01071999967098236
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,63,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,511,0.011152000476916632
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,511,0.011306667079528173
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,255,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,255,0.010565333068370819
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,1023,0.010666667173306147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,1023,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,2047,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,2047,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,16383,0.020842666427294414
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,8191,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,32767,0.026943999032179516
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,32767,0.02493866781393687
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,65535,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,1,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,65535,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,3,0.012298667182525
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,fp8,131071,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,1,float16,float16,131071,0.06622399886449178
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,7,0.010938666760921478
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,1,0.011098666737476984
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,15,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,7,0.010672000547250112
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,31,0.010944000134865442
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,31,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,63,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,15,0.011264000087976456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,127,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,255,0.010949333508809408
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,63,0.010816000401973724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,511,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,511,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,255,0.011813333878914515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,2047,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,127,0.01126933346192042
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,4095,0.039936001102129616
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,4095,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,2047,0.019130667050679524
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,1023,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,8191,0.06177600224812826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,8191,0.041306667029857635
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,1,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,1,0.012682666381200155
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,16383,0.10443199674288432
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,3,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,7,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,15,0.012863999853531519
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,15,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,16383,0.06453333298365276
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,31,0.012655999511480331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,63,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,float16,32767,0.18926932414372763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,63,0.01268799975514412
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,1,float16,fp8,32767,0.10820266604423523
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,127,0.012965332716703415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,255,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,511,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,1023,0.0174346665541331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,1023,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,2047,0.039594667653242745
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,2047,0.026949333647886913
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,4095,0.043023998538653054
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,4095,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,8191,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,8191,0.06451199948787689
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,1,0.013317332913478216
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,1,0.012890666723251343
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,3,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,16383,0.1088853379090627
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,7,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,7,0.01267733300725619
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,15,0.013653332988421122
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,3,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,16383,0.19182932376861572
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,31,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,31,0.012639999389648438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,63,0.01331199953953425
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,15,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,float16,32767,0.36231998602549237
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,63,0.012874666601419449
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,1,float16,fp8,32767,0.1976319948832194
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,127,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,255,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,255,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,511,0.016741332908471424
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,127,0.012970666090647379
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,511,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,2047,0.041322665909926094
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,2047,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,1023,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,1023,0.01775466650724411
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,1,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,4095,0.043007999658584595
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,1,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,4095,0.06277866661548615
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,3,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,3,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,7,0.016751999656359356
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,15,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,float16,8191,0.10578667124112447
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,15,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,1,float16,fp8,8191,0.06587733328342438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,127,0.09045333663622539
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,31,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,31,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,255,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,511,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,255,0.016735999534527462
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,1023,0.03583466758330663
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,1023,0.02489600082238515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,2047,0.06348800162474315
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,2047,0.04266666869322459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,4095,0.10615467031796773
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,1,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,4095,0.06588799754778545
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,3,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,3,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,7,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,float16,8191,0.1914880077044169
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,15,0.016704000532627106
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,7,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,1,float16,fp8,8191,0.11127466956774394
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,63,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,15,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,31,0.016730666160583496
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,31,0.015013333410024643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,63,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,127,0.014677333335081736
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,255,0.015018666783968607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,127,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,255,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,511,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,511,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,1023,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,1023,0.03618133316437403
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,2047,0.04301333427429199
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,2047,0.0628053347269694
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,3,0.021856000026067097
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,3,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,1,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,1,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,7,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,7,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,15,0.021482666333516438
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,float16,4095,0.10749333103497823
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,15,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,63,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,1,float16,fp8,4095,0.06586666901906331
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,31,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,31,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,63,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,127,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,255,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,255,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,127,0.01912533367673556
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,511,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,1023,0.05494933327039083
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,1023,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,1,0.010480000327030817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,2047,0.10103467106819153
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,1,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,2047,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,3,0.010608000059922537
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,3,0.010458666831254959
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,7,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,7,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,15,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,15,0.010570666442314783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,63,0.010821333775917688
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,31,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,float16,4095,0.17987199624379477
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,1,float16,fp8,4095,0.10171199838320415
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,255,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,511,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,2047,0.014671999961137772
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,511,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,4095,0.0170666662355264
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,2047,0.012975999464591345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,1023,0.011605333536863327
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,8191,0.02081599955757459
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,16383,0.021173333128293354
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,8191,0.0207893339296182
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,16383,0.02218666672706604
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,32767,0.026965332527955372
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,32767,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,65535,0.044031997521718345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,65535,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,fp8,131071,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,1,float16,float16,131071,0.0679253339767456
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,7,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,3,0.010901333143313726
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,7,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,15,0.010480000327030817
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,63,0.010645333677530289
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,31,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,63,0.010666667173306147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,31,0.010826667149861654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,255,0.010426666587591171
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,511,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,4095,0.01876266673207283
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,4095,0.017749333133300144
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,1023,0.010928000013033548
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,511,0.010650667051474253
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,2047,0.01706133286158244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,1023,0.01089599976936976
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,2047,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,8191,0.020821332931518555
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,8191,0.020831999679406483
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,16383,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,32767,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,32767,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,65535,0.06347199777762096
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,1,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,65535,0.04130133241415024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,fp8,131071,0.06385066608587901
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,1,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,1,float16,float16,131071,0.1030560036500295
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,7,0.018778666853904724
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,3,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,3,0.0194560003777345
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,7,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,15,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,31,0.023210667073726654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,31,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,15,0.022863999009132385
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,63,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,63,0.020138667275508244
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,127,0.022895999252796173
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,255,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,127,0.020479999482631683
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,255,0.022869333624839783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,511,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,511,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,1023,0.05563733478387197
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,1,0.02903999884923299
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,1,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,3,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,3,0.027632000545660656
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,1023,0.037205333511034645
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,7,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,7,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,15,0.034474665919939675
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,15,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,31,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,fp8,2047,0.06211733321348826
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,63,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,31,0.027973333994547527
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,1,float16,float16,2047,0.10205333431561787
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,63,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,255,0.03721066564321518
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,127,0.03379199902216593
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,127,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,255,0.027306665976842243
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,511,0.041296000281969704
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,511,0.056661332647005715
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,1023,0.09659199913342793
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,1,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,1023,0.061434666315714516
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,3,0.02935466667016347
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,1,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,3,0.03517866631348928
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,7,0.029018667836983997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,7,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,15,0.02900800108909607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,fp8,2047,0.10478933652242024
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,15,0.035162667433420815
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,1,float16,float16,2047,0.18244266510009766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,31,0.03551999976237615
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,31,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,63,0.03515733281771342
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,63,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,127,0.03549866626660029
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,127,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,255,0.03755199909210205
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,255,0.0290133332212766
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,1,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,511,0.041989331444104515
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,1,0.04537599782148997
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,511,0.0580266664425532
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,3,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,7,0.06006933252016703
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,3,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,7,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,15,0.045738667249679565
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,15,0.058037335673967995
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,31,0.04540266593297323
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,float16,1023,0.09693333506584167
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,31,0.05973333120346069
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,63,0.0580320010582606
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,63,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,127,0.06075199941794077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,127,0.04539733131726583
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,1,float16,fp8,1023,0.06178133189678192
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,1,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,255,0.06144000093142191
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,255,0.04744533201058706
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,7,0.010485333700974783
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,511,0.09796266754468282
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,15,0.010666667173306147
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,15,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,511,0.06963199873765309
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,7,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,63,0.010656000425418219
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,127,0.010634666929642359
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,255,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,511,0.010832000523805618
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,511,0.010602666685978571
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,127,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,255,0.010586666564146677
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,1023,0.011610666910807291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,1023,0.012624000509579977
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,2047,0.016714667280515034
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,8191,0.021162666380405426
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,8191,0.02082666630546252
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,4095,0.01877333347996076
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,4095,0.018768000106016796
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,16383,0.02492266645034154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,float16,1023,0.17749333381652832
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,16383,0.02252800017595291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,1,float16,fp8,1023,0.10752000411351521
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,32767,0.02696000039577484
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,32767,0.04334933559099833
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,65535,0.062463998794555664
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,65535,0.04298133154710134
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,float16,131071,0.10444800059000652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,7,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,7,0.010847999403874079
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,3,0.010757333288590113
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,1,float16,fp8,131071,0.0634933312733968
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,3,0.010922666639089584
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,15,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,31,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,15,0.010911999891201654
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,31,0.01062400018175443
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,63,0.010597333312034607
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,63,0.010575999816258749
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,255,0.010640000303586325
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,1023,0.012629333883523941
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,511,0.012351999680201212
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,127,0.010591999938090643
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,127,0.010581333190202713
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,255,0.010474666953086853
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,511,0.011610666910807291
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,2047,0.01672533278663953
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,2047,0.015696000307798386
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,4095,0.01878400022784869
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,1023,0.012608000387748083
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,4095,0.016719999412695568
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,8191,0.021157334248224895
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,8191,0.01911466692884763
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,16383,0.02491733431816101
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,16383,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,32767,0.06041066845258077
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,32767,0.03925333420435587
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,65535,0.10171733299891154
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,65535,0.060415998101234436
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,fp8,131071,0.10513066252072652
TRTLLM,1.0.0rc6,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,1,float16,float16,131071,0.18807466824849448
