framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,window_size,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,1,0.02382933348417282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,1,0.02422933280467987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,3,0.023733332753181458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,3,0.024271999796231587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,7,0.02478400121132533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,7,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,15,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,15,0.025962665677070618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,31,0.03182933231194814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,31,0.03224000086386999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,63,0.032085334261258446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,63,0.032618666688601174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,127,0.033088001112143196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,127,0.032816000282764435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,255,0.04472533365090688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,255,0.0397119993964831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,511,0.06781333188215892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,511,0.06477866570154826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,1023,0.13623467087745667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,1023,0.10700266559918721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,2047,0.2588160037994385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,2047,0.2063466707865397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,4095,0.4979519844055176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,4095,0.3920053243637085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,1,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,1,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,3,0.007823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,3,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,7,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,7,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,15,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,31,0.008016000191370646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,31,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,63,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,63,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,127,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,127,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,255,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,15,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,255,0.011546666423479715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,511,0.016458666572968166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,511,0.016496000190575916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,1023,0.016447999825080235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,1023,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,2047,0.029882666965325672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,2047,0.027589333554108936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,4095,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,4095,0.03481066723664602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,8191,0.04797866443792979
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,8191,0.05348266661167145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,16383,0.0795413355032603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,16383,0.09690133730570476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,1,0.008010666817426682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,1,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,3,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,3,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,7,0.008133333176374435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,7,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,15,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,15,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,31,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,31,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,63,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,63,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,127,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,127,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,255,0.01421333352724711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,255,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,511,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,511,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,1023,0.028927999238173168
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,1023,0.02622399975856145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,2047,0.031104000906149547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,2047,0.03259200106064478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,4095,0.04678399860858917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,8191,0.0846026639143626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,16383,0.14587733149528503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,1,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,1,0.044069334864616394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,3,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,7,0.04494933287302653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,7,0.04590400060017904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,31,0.05796800057093302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,31,0.059088001648585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,63,0.059402664502461754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,63,0.05972266693909963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,4095,0.04078399886687597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,127,0.06563733518123627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,16383,0.11353066563606262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,8191,0.064560001095136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,127,0.060693333546320595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,15,0.04724800089995066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,3,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,255,0.08075733482837677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,255,0.07951466739177704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,15,0.046223998069763184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,511,0.12423466642697652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,511,0.12099200487136841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,1023,0.25882667303085327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,1023,0.20339733362197876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,2047,0.5019199848175049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,1,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,2047,0.39671464761098224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,1,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,3,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,3,0.00843733362853527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,7,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,7,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,15,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,15,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,31,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,31,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,63,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,63,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,127,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,127,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,1023,0.01863466699918111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,255,0.011418666690587997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,255,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,511,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,4095,0.07127999762694041
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,511,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,1023,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,8191,0.13159466783205667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,2047,0.04037333279848099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,2047,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,4095,0.05672533313433329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,8191,0.10341866811116536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,16383,0.2571786642074585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,1,0.08063466846942902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,16383,0.19779199361801147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,1,0.0824533353249232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,3,0.08080533146858215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,3,0.08243733147780101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,7,0.08401067058245341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,7,0.0860640009244283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,15,0.08691733082135518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,15,0.08872000376383464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,31,0.11196800072987874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,31,0.11204800009727478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,63,0.12016533811887105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,63,0.11493333180745442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,127,0.12315733234087627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,127,0.12231466174125671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,255,0.15345600247383118
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,255,0.15019733707110086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,511,0.23802665869394937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,511,0.23246934016545615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,1023,0.4970719814300537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,1023,0.39266665776570636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,1,0.1553439994653066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,1,0.1591253379980723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,3,0.15568533539772034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,3,0.15938133001327515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,7,0.16269333163897196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,7,0.16703999042510986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,15,0.1787733236948649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,15,0.17256534099578857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,31,0.2247999906539917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,63,0.23253866036732992
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,31,0.2232266664505005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,63,0.23448532819747925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,127,0.23771733045578003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,127,0.23734933137893677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,255,0.2964266737302144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,255,0.2924693425496419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,1,0.3048693339029948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,1,0.3142506678899129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,3,0.3102239966392517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,3,0.312879999478658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,7,0.34275201956431073
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,7,0.33155200878779095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,15,0.36345601081848145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,15,0.3649173180262248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,31,0.44203734397888184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,31,0.448794682820638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,63,0.4577440023422241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,63,0.4614986578623454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,127,0.4671146472295125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,7,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,1,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,15,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,1,0.014010666559139887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,127,0.46712533632914227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,3,0.01423466702302297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,3,0.01422400027513504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,7,0.014495999862750372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,15,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,31,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,31,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,63,0.018725333114465077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,63,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,127,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,127,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,255,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,255,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,511,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,511,0.02248000105222066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,1023,0.03887466589609782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,1023,0.03306133300065994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,2047,0.07426666716734569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,2047,0.058335999647776283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,4095,0.13525333007176718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,4095,0.108106662829717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,8191,0.2550080021222432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,8191,0.19983466466267905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,16383,0.5046240091323853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,16383,0.38356268405914307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,1,0.6585386594136556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,1,0.6471786499023438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,3,0.6684479713439941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,3,0.6700479984283447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,7,0.6866986751556396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,7,0.6987573305765787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,15,0.7199573516845703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,15,0.7311626275380453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,31,0.8773760000864664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,31,0.890442689259847
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,63,0.9076000054677328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,63,0.9160266717274984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,1,1.3252800305684407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,1,1.3529225985209148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,3,1.3298933506011963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,3,1.3530826568603516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,7,1.364847977956136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,1,0.013909333695967993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,1,0.014122666170199713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,7,1.390442689259847
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,3,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,3,0.014058666924635569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,7,0.014389333625634512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,15,1.433087984720866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,15,1.4560640652974446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,7,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,15,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,31,1.7470347086588542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,15,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,31,1.775546709696452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,31,0.01820266619324684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,31,0.018570666511853535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,63,0.018378666291634243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,63,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,127,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,127,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,255,0.02274666726589203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,255,0.02201066662867864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,511,0.037530665596326195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,511,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,1023,0.07364266614119212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,1023,0.057536001006762184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,2047,0.1346666713555654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,2047,0.10949866970380147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,4095,0.25492266813913983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,4095,0.20451732476552328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,8191,0.5256746610005697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,8191,0.4031306505203247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,1,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,1,0.024266667664051056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,3,0.02441066751877467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,3,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,7,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,31,0.03200000027815501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,31,0.03199466566244761
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,7,0.026005332668622334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,63,0.03278933217128118
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,15,0.03189333279927572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,63,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,15,0.03197333216667175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,127,0.04437866806983948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,127,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,255,0.06766400237878163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,255,0.06477333108584087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,511,0.13646399974822998
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,511,0.10755733648935954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,1023,0.2586560050646464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,1023,0.20693333943684897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,2047,0.49820268154144287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,2047,0.39370667934417725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,1,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,1,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,4095,0.9810986518859863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,4095,0.7660533587137858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,3,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,3,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,63,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,7,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,7,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,15,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,15,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,31,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,31,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,63,0.011434666812419891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,127,0.011418666690587997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,127,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,255,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,255,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,511,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,511,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,1023,0.027637332677841187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,1023,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,2047,0.03537066777547201
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,2047,0.034389334420363106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,4095,0.0529013325770696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,4095,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,8191,0.09701866904894511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,8191,0.08169599870840709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,16383,0.16369600097338358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,16383,0.14362133542696634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,1,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,3,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,3,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,7,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,7,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,15,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,15,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,31,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,31,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,63,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,255,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,63,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,127,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,127,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,255,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,511,0.026778665681680042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,511,0.02603200078010559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,1023,0.032272001107533775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,1023,0.03030933439731598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,2047,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,2047,0.04119999955097834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,4095,0.08346666892369588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,4095,0.06712000072002411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,8191,0.1431893308957418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,8191,0.11985066533088684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,16383,0.26239999135335285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,1,0.042837331692377724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,16383,0.2143626610438029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,1,0.04414399961630503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,3,0.04470933477083842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,3,0.04572799801826477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,7,0.046037331223487854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,7,0.047397335370381675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,63,0.05982933441797892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,15,0.05821333328882853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,15,0.05835733314355215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,255,0.12524799505869547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,255,0.12166933218638103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,31,0.059978668888409935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,31,0.05871466795603434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,63,0.06549866497516632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,127,0.08106666803359985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,127,0.0795306662718455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,511,0.25867732365926105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,511,0.20542933543523154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,2047,0.9850719769795736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,1023,0.504261334737142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,1023,0.39689600467681885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,1,0.00850133349498113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,1,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,2047,0.7630346616109213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,3,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,3,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,7,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,7,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,15,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,15,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,31,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,31,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,63,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,63,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,127,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,1023,0.04012800008058548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,127,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,255,0.013301332791646322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,255,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,511,0.018298666924238205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,511,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,1023,0.028560000161329906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,2047,0.07088533540566762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,2047,0.05547733108202616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,4095,0.1318826675415039
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,1,0.08037333190441132
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,4095,0.10202667117118835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,8191,0.2582719922065735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,8191,0.19196800390879312
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,16383,0.5037866830825806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,16383,0.3754719893137614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,1,0.08321600159009297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,3,0.08356799681981404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,3,0.08641067147254944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,7,0.087226668993632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,7,0.08962133526802063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,15,0.114138662815094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,15,0.11157866319020589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,31,0.12071999907493591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,31,0.11593600114186604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,63,0.12361066540082295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,63,0.12290133039156596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,127,0.1534773310025533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,127,0.1516586641470591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,255,0.23858133951822916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,255,0.234442671140035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,511,0.49775465329488117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,511,0.395957350730896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,1,0.15505066514015198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,1023,0.9851840337117513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,1023,0.7698720296223959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,1,0.15980266531308493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,3,0.1648319959640503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,3,0.16742932796478271
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,7,0.18312533696492514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,7,0.17790400981903076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,15,0.22824533780415854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,15,0.22873065869013467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,31,0.23450666666030884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,31,0.23438400030136108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,63,0.23876800139745077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,63,0.2382026712099711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,127,0.29768532514572144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,127,0.2955999970436096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,255,0.46579734484354657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,255,0.4581120014190674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,3,0.34759998321533203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,1,0.3344373305638631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,1,0.33217066526412964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,3,0.3547360102335612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,7,0.36510932445526123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,7,0.3753439982732137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,15,0.44997866948445636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,15,0.4564906756083171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,31,0.4617439905802409
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,31,0.4610186815261841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,63,0.4698293209075928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,63,0.46909332275390625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,1,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,1,0.014261333892742792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,3,0.01403733342885971
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,127,0.5860106547673544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,127,0.5827680031458536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,3,0.014303999642531076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,7,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,7,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,15,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,15,0.01573866605758667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,31,0.018725333114465077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,31,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,255,0.02253866692384084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,63,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,63,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,127,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,127,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,255,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,511,0.038506666819254555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,511,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,1023,0.07326933244864146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,1023,0.05840000013510386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,2047,0.134853333234787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,2047,0.10605866710344951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,4095,0.25547732909520465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,4095,0.1969333291053772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,8191,0.49823466936747235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,8191,0.3768746852874756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,16383,0.7405386765797933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,16383,0.9956160386403402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,1,0.6698773701985677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,1,0.6892106533050537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,3,0.6884960333506266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,3,0.7081279754638672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,7,0.7247733275095621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,7,0.7442239920298258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,15,0.8931626478830973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,15,0.9066239992777506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,31,0.9158559640248617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,63,0.9304426511128744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,31,0.9157333374023438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,63,0.9299680391947428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,1,1.3357386589050293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,1,1.370741367340088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,3,1.3698933919270833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,3,1.4090612729390461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,1,0.013658666362365087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,7,1.4403626124064128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,1,0.014229333649079004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,7,1.4821972846984863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,3,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,15,1.7802240053812664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,15,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,15,1.8059412638346355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,31,1.8256533940633137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,3,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,7,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,7,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,31,1.8291999499003093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,15,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,31,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,31,0.018122666825850803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,63,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,63,0.018351999421914417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,127,0.02207999924818675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,127,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,255,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,255,0.03702933341264725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,511,0.07382399837176006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,511,0.05723733206590017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,1023,0.1341973344484965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,1023,0.10899733503659566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,2047,0.25677865743637085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,2047,0.2043786644935608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,4095,0.4973013401031494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,4095,0.3933866818745931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,3,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,1,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,8191,1.0354026953379314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,8191,0.7872426509857178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,1,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,3,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,7,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,7,0.020165332903464634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,15,0.020069333414236706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,15,0.020538666596015293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,127,0.025706666211287182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,31,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,31,0.025626666843891144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,63,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,63,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,511,0.04997866849104563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,127,0.025621332228183746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,255,0.034458667039871216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,255,0.030559999247392017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,511,0.05275199810663859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,1023,0.1048959990342458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,1023,0.08268799881140391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,2047,0.19806400934855142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,2047,0.15799466768900552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,4095,0.3771680196126302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,1,0.007823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,4095,0.29926933844884235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,1,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,3,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,3,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,7,0.00789866658548514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,7,0.008879999940594038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,127,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,15,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,31,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,31,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,63,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,63,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,127,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,255,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,255,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,511,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,511,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,1023,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,1023,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,2047,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,2047,0.026965332527955372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,4095,0.034815999368826546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,4095,0.03459733227888743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,8191,0.051039998730023704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,8191,0.048991998036702476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,16383,0.09115733702977498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,1,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,16383,0.07772799829641978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,1,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,3,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,3,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,7,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,7,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,15,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,127,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,15,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,31,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,31,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,63,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,63,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,127,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,255,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,255,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,511,0.0100853331387043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,511,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,1023,0.02605866640806198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,1023,0.025759999950726826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,2047,0.030495998760064442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,2047,0.030229332546393078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,4095,0.0432533323764801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,4095,0.04029333343108495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,8191,0.07384000221888225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,8191,0.06302399933338165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,16383,0.12997333208719888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,16383,0.10838933785756429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,1,0.03387733300526937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,1,0.034474665919939675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,3,0.03392533212900162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,3,0.034416000048319496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,7,0.03498133271932602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,7,0.035690667728583016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,15,0.036229332288106285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,15,0.036874666810035706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,31,0.04504533112049103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,31,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,63,0.04560533165931702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,63,0.046256000796953835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,127,0.05092266698678335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,127,0.04649066428343455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,255,0.06322133541107178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,255,0.06173333525657654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,511,0.09692266583442688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,511,0.0936959981918335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,1023,0.1970720092455546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,1023,0.1560533344745636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,2047,0.3821653525034587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,2047,0.3027679920196533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,1,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,1,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,3,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,3,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,7,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,7,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,15,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,15,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,31,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,31,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,63,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,63,0.01138666644692421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,127,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,127,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,255,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,255,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,511,0.013338666409254074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,511,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,1023,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,1023,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,2047,0.03018666555484136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,2047,0.028970666229724884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,4095,0.060821334520975746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,4095,0.05068266888459524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,8191,0.11198400457700093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,8191,0.10274133086204529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,16383,0.2159093419710795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,1,0.06214933097362518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,16383,0.1925119956334432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,1,0.06358933448791504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,3,0.06225066880385081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,3,0.06372266511122386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,7,0.06469333171844482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,7,0.06669866542021434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,63,0.08658666412035625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,15,0.06694399813810985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,15,0.06811200082302094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,31,0.08422399560610454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,31,0.08540266752243042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,63,0.09186666210492452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,127,0.09450133641560872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,127,0.09388267000516255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,255,0.11698666214942932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,255,0.11518933375676473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,511,0.18158932526906332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,511,0.17669866482416788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,1023,0.37593599160512287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,1023,0.2977120081583659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,1,0.11828800042470296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,1,0.12124266227086385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,3,0.11850133538246155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,3,0.12146666646003723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,7,0.12354666988054912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,7,0.1269599994023641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,15,0.13099199533462524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,15,0.13057600458463034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,31,0.17037866512934366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,127,0.1798293391863505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,31,0.16564800341924033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,63,0.17674134174982706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,63,0.17805866400400797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,127,0.18022932608922324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,255,0.2246826688448588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,255,0.222378671169281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,1,0.2362933357556661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,1,0.23005332549413046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,3,0.2312426765759786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,3,0.23679467042287192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,7,0.24919466177622476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,7,0.24896534283955893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,15,0.2742133339246114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,15,0.2643946607907613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,31,0.333679993947347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,127,0.3526560068130493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,31,0.33879999319712323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,3,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,63,0.34508800506591797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,63,0.3482666810353597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,127,0.3524906635284424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,1,0.013722666849692663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,1,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,31,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,3,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,7,0.013701333353916803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,7,0.01404800017674764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,15,0.01370666672786077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,15,0.014250667144854864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,31,0.013951999445756277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,63,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,63,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,127,0.015722667177518208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,127,0.015685333559910457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,255,0.01607999950647354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,255,0.015967999895413715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,511,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,511,0.0183146670460701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,1023,0.03050133337577184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,1023,0.02587199956178665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,2047,0.06434133152167003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,2047,0.04682666560014089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,4095,0.11399466792742412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,4095,0.08731733759244283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,8191,0.21839465697606406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,8191,0.16819200913111368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,16383,0.44272534052530926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,16383,0.32223467032114667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,1,0.47116267681121826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,1,0.46860265731811523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,3,0.5011306603749593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,3,0.4804106553395589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,7,0.5176533460617065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,7,0.5233279863993326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,15,0.5427093505859375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,15,0.5510186751683553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,31,0.6608159939448038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,31,0.6711093584696451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,63,0.6823786894480387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,63,0.6887839635213217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,1,0.9973386923472086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,1,1.0157279968261719
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,3,1.0004053115844727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,7,1.0277547041575115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,3,1.018448034922282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,7,1.0475467046101887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,1,0.013418667018413544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,15,1.0784213542938232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,1,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,3,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,15,1.095578670501709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,3,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,7,0.01368533323208491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,7,0.013264000415802002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,15,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,31,1.3143680095672607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,15,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,31,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,31,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,31,1.3345920244852703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,63,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,63,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,127,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,127,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,255,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,255,0.017818666994571686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,511,0.03013866643110911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,511,0.025722667574882507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,1023,0.062314664324124656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,1023,0.045567999283472695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,2047,0.11489066481590271
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,8191,0.4406239986419678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,2047,0.08649599552154541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,4095,0.21602133909861246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,4095,0.16992533206939697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,7,0.020010666300853092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,7,0.020629333953062694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,15,0.024911999702453613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,1,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,8191,0.3359786669413249
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,1,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,3,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,3,0.019920000185569126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,15,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,31,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,31,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,63,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,63,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,127,0.034346667428811394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,127,0.03046400099992752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,255,0.052282666166623436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,255,0.04985066751639048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,2047,0.37744001547495526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,511,0.10492266217867534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,511,0.08253333469231923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,1023,0.19806933403015137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,1023,0.15774400035540262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,2047,0.30088533957799274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,1,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,1,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,4095,0.7397119998931885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,4095,0.5823573271433512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,3,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,3,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,31,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,7,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,7,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,15,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,15,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,31,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,63,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,63,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,127,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,127,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,255,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,255,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,511,0.01573333392540614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,511,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,1023,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,1023,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,2047,0.03474666674931844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,2047,0.03412266572316488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,4095,0.051221330960591636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,4095,0.047269334395726524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,8191,0.0918986697991689
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,8191,0.07922666768232982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,16383,0.15967999895413718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,3,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,1,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,16383,0.14172266920407614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,1,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,3,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,7,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,7,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,15,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,63,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,15,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,31,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,31,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,63,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,127,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,127,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,255,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,255,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,511,0.02595199892918269
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,511,0.025818665822347004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,1023,0.03014933317899704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,1023,0.030005333324273426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,2047,0.043162668744723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,2047,0.04029333343108495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,4095,0.07423999905586243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,4095,0.06465066472689311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,8191,0.12737066547075906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,8191,0.11345066626866658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,16383,0.23165865739186606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,1,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,16383,0.20829866329828897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,1,0.034645333886146545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,3,0.03488533447186152
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,3,0.03586666782697042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,7,0.03595199932654699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,7,0.03700266778469086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,15,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,15,0.04554666578769684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,31,0.0458133320013682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,31,0.04554666578769684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,255,0.09368000427881877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,63,0.0509493350982666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,63,0.04633066554864248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,127,0.06299200157324474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,127,0.06156266729036967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,255,0.09745066364606221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,511,0.19777599970499674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,511,0.15711999932924905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,1023,0.3817439874013265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,1023,0.3031146725018819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,1,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,2047,0.7419733206431071
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,2047,0.5808053414026896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,1,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,3,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,3,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,7,0.00847999999920527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,7,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,15,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,15,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,31,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,31,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,63,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,63,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,127,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,127,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,255,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,255,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,511,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,511,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,1023,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,1023,0.028688001135985058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,2047,0.060645331939061485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,2047,0.050581331054369606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,4095,0.1113813320795695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,16383,0.4186240037282308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,4095,0.10034666458765666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,8191,0.2127093275388082
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,8191,0.18982400496800741
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,1,0.06191466748714447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,16383,0.3723520040512085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,1,0.06399466594060262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,3,0.0643039991458257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,3,0.06651199857393901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,7,0.06674133241176605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,7,0.06871999800205231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,15,0.08518933256467183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,15,0.08503466844558716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,31,0.09195733070373535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,31,0.08622933427492778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,63,0.09450133641560872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,255,0.17733865976333618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,63,0.0937600036462148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,511,0.3766719897588094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,127,0.11726400256156921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,127,0.11593066652615865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,255,0.18209600448608398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,511,0.2993333339691162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,1023,0.7425706386566162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,1023,0.5820266803105673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,1,0.11771200100580852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,1,0.12139733632405598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,3,0.12341333429018657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,15,0.17414933443069458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,3,0.12743999560674033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,31,0.1776533325513204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,7,0.13379200299580893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,7,0.1328266660372416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,15,0.1681013305981954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,31,0.1782133380572001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,63,0.18136000633239746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,63,0.18082133928934732
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,127,0.22589333852132162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,127,0.22454933325449625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,255,0.3529866536458333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,255,0.34623467922210693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,3,0.2603893280029297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,1,0.23751999934514365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,1,0.23922133445739746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,3,0.25697600841522217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,7,0.27641065915425617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,7,0.28088533878326416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,15,0.3397279977798462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,15,0.3443839947382609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,31,0.3484799861907959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,31,0.34889066219329834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,63,0.35438398520151776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,63,0.35417600472768146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,3,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,1,0.013546666751305262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,127,0.44179733594258624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,127,0.43906664848327637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,1,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,3,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,7,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,7,0.013989333063364029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,15,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,15,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,31,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,31,0.015599999576807022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,63,0.01590399940808614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,63,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,127,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,1023,0.06587733328342438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,127,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,255,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,255,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,511,0.03109866629044215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,511,0.0264533335963885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,1023,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,2047,0.11214933792750041
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,2047,0.08609599868456523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,4095,0.22670932610829672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,4095,0.16885866721471152
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,8191,0.4087466796239217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,8191,0.3229493300120036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,16383,0.8713653087615967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,1,0.5055573383967081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,16383,0.6366026798884074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,1,0.5203306674957275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,3,0.5197973251342773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,3,0.5356533527374268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,7,0.5459786653518677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,7,0.560975988705953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,15,0.6729119618733724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,15,0.6824639638264974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,31,0.6904053688049316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,31,0.6905013720194498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,63,0.7003146807352701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,63,0.7012106577555338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,1,1.0042826334635417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,1,1.0313973426818848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,3,1.0317333539326985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,3,1.0610506534576416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,7,1.0860906442006428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,7,1.1153066953023274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,1,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,1,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,15,1.338362693786621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,3,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,3,0.013370666652917862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,7,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,15,1.3608266512552898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,7,0.01332266628742218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,15,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,15,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,31,1.3735733032226562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,31,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,31,1.3733545939127605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,127,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,31,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,63,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,63,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,127,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,255,0.029839999973773956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,255,0.025610665480295818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,2047,0.21527999639511108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,2047,0.17331733306248984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,511,0.06097066899140676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,511,0.0462719996770223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,1023,0.11402133107185364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,1023,0.08771733442942302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,4095,0.4206240177154541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,8191,0.8341813087463379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,4095,0.3327839970588684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,1,0.017722666263580322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,1,0.017808000246683758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,8191,0.6683946450551351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,3,0.017797333498795826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,3,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,31,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,31,0.02199466774861018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,7,0.018197332819302876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,7,0.01836266616980235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,15,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,15,0.018581333259741466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,63,0.02186666677395503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,63,0.02214933435122172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,127,0.022277332842350006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,127,0.022389332453409832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,255,0.0278613343834877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,255,0.026133333643277485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,511,0.047354668378829956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,4095,0.3364906708399455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,511,0.04079466561476389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,1023,0.09384533762931824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,1023,0.07162666817506154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,2047,0.1807253360748291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,2047,0.13481600085894266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,1,0.007727999861041705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,7,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,4095,0.2606239914894104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,1,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,3,0.007690666864315669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,3,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,7,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,15,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,15,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,31,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,31,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,63,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,63,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,127,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,127,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,255,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,255,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,511,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,511,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,1023,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,1023,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,8191,0.04009066770474116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,2047,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,2047,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,4095,0.03033066789309184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,4095,0.030282666285832722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,8191,0.041178666055202484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,16383,0.07364800075689952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,1,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,16383,0.06072533130645752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,1,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,3,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,3,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,7,0.007925333455204964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,7,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,15,0.007877333089709282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,15,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,31,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,31,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,63,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,63,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,1023,0.0259253333012263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,127,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,127,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,255,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,255,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,511,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,4095,0.04013866682847341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,511,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,1023,0.025775998830795288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,2047,0.030282666285832722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,2047,0.030016000072161358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,4095,0.04091199984153112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,8191,0.07358400026957194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,8191,0.06108266611893972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,16383,0.125082661708196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,3,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,16383,0.1076746682325999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,1,0.02900800108909607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,1,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,3,0.02902399996916453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,7,0.03001066545645396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,7,0.03057066599527995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,15,0.030799999833106995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,15,0.03137599925200144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,31,0.03839999934037527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,127,0.0396373321612676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,31,0.03902933249870936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,63,0.038773333032925926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,63,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,127,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,1023,0.16774400075276694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,255,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,255,0.05087466537952423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,511,0.08270933230717976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,511,0.07955733438332875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,1023,0.13184000054995218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,2047,0.31986133257548016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,2047,0.25522667169570923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,1,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,1,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,3,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,7,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,3,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,7,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,15,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,15,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,31,0.009343999748428663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,31,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,63,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,63,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,127,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,127,0.019893333315849304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,255,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,255,0.01993600030740102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,511,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,511,0.02035733312368393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,1023,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,1023,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,2047,0.028565332293510437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,2047,0.03259200106064478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,4095,0.057189335425694786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,4095,0.0531626691420873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,8191,0.10479467113812764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,3,0.05284800132115682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,8191,0.10086933771769206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,16383,0.20259199539820352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,1,0.052671998739242554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,16383,0.18494399388631186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,1,0.05399466554323832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,3,0.05398933092753092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,7,0.05444266895453135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,7,0.05598400036493937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,15,0.05638400216897329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,127,0.07970133423805237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,15,0.05761066575845083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,31,0.07037866612275441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,31,0.0717493345340093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,63,0.07552533348401387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,63,0.07260266443093617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,1023,0.31704533100128174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,127,0.07684266567230225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,1,0.09968533118565877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,255,0.09874133268992107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,255,0.09692266583442688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,511,0.1527413328488668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,511,0.14801599582036337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,1023,0.24889600276947021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,1,0.1021066705385844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,3,0.0999786655108134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,3,0.10221866766611735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,7,0.10409067074457805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,31,0.1387999951839447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,7,0.10687999924023946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,15,0.10838933785756429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,15,0.10994666814804077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,31,0.1423733333746592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,63,0.14843199650446573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,63,0.14748266339302063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,127,0.15146133303642273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,127,0.15127999583880106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,255,0.18884267409642538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,255,0.18597332636515299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,1,0.1928053299585978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,1,0.19799466927846274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,3,0.19331733385721842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,3,0.19839467604955038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,7,0.20473599433898926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,7,0.20836800336837769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,15,0.22807466983795166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,15,0.21792000532150269
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,31,0.2797813415527344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,31,0.28274667263031006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,63,0.28904000918070477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,63,0.29155733187993366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,127,0.29502934217453003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,1,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,127,0.2953653335571289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,1,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,3,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,3,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,7,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,7,0.013317332913478216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,15,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,15,0.014010666559139887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,31,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,31,0.01357866699496905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,63,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,63,0.015642666568358738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,127,0.015813333292802174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,127,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,255,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,1023,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,255,0.015749332805474598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,511,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,511,0.018485333770513535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,1023,0.02869333326816559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,2047,0.058330665032068886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,2047,0.04363200068473816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,4095,0.10340799887975057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,4095,0.0862613320350647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,8191,0.19960532585779825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,8191,0.15734933813412985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,16383,0.38813332716623944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,16383,0.31865066289901733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,1,0.3863946596781413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,1,0.3901866674423218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,3,0.40459732214609784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,3,0.39485331376393634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,15,0.45500266551971436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,7,0.4325600067774455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,7,0.4267946481704712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,15,0.4606399933497111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,31,0.5529173215230306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,31,0.5610880057017008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,63,0.5698666572570801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,63,0.57532799243927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,1,0.8326666355133057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,1,0.8442293008168539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,3,0.8360106945037842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,3,0.8501013120015463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,7,0.8626240094502767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,1,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,7,0.8752266565958658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,1,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,15,0.9010186990102133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,15,0.9154293537139893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,3,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,3,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,7,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,7,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,31,1.098357359568278
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,63,0.014250667144854864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,15,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,127,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,31,1.1156906286875408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,15,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,31,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,31,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,63,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,127,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,255,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,255,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,511,0.026693334182103474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,4095,0.19883733987808228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,511,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,1023,0.057349334160486855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,1,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,8191,0.3296320041020711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,1023,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,3,0.017935999979575474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,3,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,7,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,2047,0.10231999556223552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,2047,0.08493866523106892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,4095,0.15784533818562826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,8191,0.39507734775543213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,1,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,7,0.01842133328318596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,15,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,15,0.021781332790851593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,31,0.02162666618824005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,31,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,63,0.02205866575241089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,63,0.022090665996074677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,511,0.07260799904664357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,127,0.028565332293510437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,1023,0.13570132851600647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,127,0.025813333690166473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,255,0.047007997830708824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,4095,0.5082826614379883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,255,0.041322665909926094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,511,0.09341866771380107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,1023,0.17812800407409668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,2047,0.33605865637461346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,2047,0.26422399282455444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,4095,0.6562879880269369
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,1,0.008810666700204214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,1,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,3,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,3,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,7,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,7,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,15,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,127,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,15,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,255,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,31,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,511,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,31,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,63,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,63,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,127,0.011546666423479715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,255,0.01157333329319954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,511,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,1023,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,1023,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,2047,0.030037333567937214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,2047,0.030048000315825146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,4095,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,4095,0.040394666294256844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,8191,0.07322133580843608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,8191,0.06169599791367849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,16383,0.12441066900889079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,16383,0.11272000273068745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,1,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,1,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,3,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,3,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,7,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,7,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,15,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,15,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,31,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,255,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,31,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,63,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,63,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,127,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,127,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,255,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,511,0.025914666553338368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,511,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,1023,0.029696000119050343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,1023,0.02993600070476532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,2047,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,2047,0.040106666584809623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,4095,0.07341333230336507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,4095,0.06162666777769724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,8191,0.12460266550381978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,8191,0.1125973363717397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,16383,0.2257279952367147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,16383,0.20123199621836343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,1,0.028677334388097126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,15,0.03896533449490865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,1,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,3,0.029552000264326733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,63,0.041637333730856575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,3,0.0305226668715477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,7,0.03075733284155528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,7,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,15,0.038575999438762665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,255,0.07950399816036224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,31,0.03889599939187368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,31,0.038917332887649536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,1023,0.32046399513880414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,63,0.03917866696914037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,127,0.0537120004494985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,127,0.050245334704717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,255,0.08259200056393941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,1,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,511,0.16746666034062704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,3,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,511,0.13251733779907227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,1023,0.2553013364473979
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,2047,0.6202559868494669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,2047,0.48811201254526776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,1,0.014549333602190018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,3,0.014453332871198654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,7,0.014346666634082794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,7,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,15,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,127,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,15,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,31,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,31,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,63,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,63,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,127,0.01964266722400983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,255,0.019930666933457058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,255,0.019946667055288952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,511,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,511,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,1023,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,1023,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,2047,0.0680213322242101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,16383,0.4294026692708333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,2047,0.0532533327738444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,4095,0.11952533324559529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,4095,0.1009173293908437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,8191,0.22334933280944824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,8191,0.1853653391202291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,16383,0.35603201389312744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,1,0.052522664268811546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,1,0.05470400055249532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,31,0.07667199770609538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,3,0.0543146679798762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,3,0.05602666735649109
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,7,0.0562666654586792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,7,0.058037335673967995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,127,0.09815466403961182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,15,0.07122666637102763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,15,0.07167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,31,0.07233599821726482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,63,0.07965333263079326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,63,0.0773226668437322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,127,0.09895466764767964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,255,0.15365866820017496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,255,0.14922133088111877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,511,0.31667200724283856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,511,0.2505653301874797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,1023,0.6229013204574585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,1023,0.48786131540934247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,1,0.09903466701507568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,1,0.10249599814414978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,3,0.10360532999038696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,3,0.10755733648935954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,7,0.10966933767000835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,63,0.1523413360118866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,7,0.11389866471290588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,15,0.1455946664015452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,127,0.18777066469192505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,15,0.1404159963130951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,31,0.14920000235239664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,31,0.14839466412862143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,63,0.15221866965293884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,127,0.18941867351531982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,255,0.2958400050799052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,7,0.23196266094843546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,255,0.2900746663411458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,1,0.19550933440526327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,1,0.19898666938145956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,31,0.2927146752675374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,3,0.21266667048136392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,3,0.21051200230916342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,7,0.22844799359639487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,15,0.2847839991251628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,15,0.288917342821757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,1,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,31,0.29204267263412476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,63,0.2986026604970296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,63,0.2965280016263326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,127,0.37030935287475586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,15,0.014357333381970724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,127,0.36762134234110516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,31,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,1,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,3,0.01331199953953425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,3,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,7,0.013568000247081121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,7,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,15,0.0143306665122509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,31,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,63,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,63,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,127,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,127,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,255,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,255,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,511,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,511,0.026560001075267792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,1023,0.059706668059031166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,1023,0.04468800127506256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,2047,0.10476266344388326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,2047,0.08649067083994548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,4095,0.20055466890335083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,1,0.42386666933695477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,4095,0.16077333688735962
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,3,0.43530134359995526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,8191,0.3897973299026489
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,8191,0.31707199414571124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,16383,0.765775998433431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,16383,0.6203360160191854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,1,0.43350398540496826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,3,0.4480533202489217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,7,0.4580320119857788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,7,0.47052268187205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,15,0.5635093450546265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,15,0.571664015452067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,31,0.5781173308690389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,1,0.8617599805196127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,31,0.5772693157196045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,63,0.5855146646499634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,63,0.5849173466364542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,1,0.8406453132629395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,3,0.8632373015085856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,3,0.8876960277557373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,1,0.012351999680201212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,31,1.1480693022410076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,7,0.9077119827270508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,7,0.9330613613128662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,15,1.1189333597819011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,15,1.136250654856364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,1,0.012538666526476542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,15,0.014394666999578476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,31,1.1483200391133626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,3,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,3,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,7,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,7,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,15,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,31,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,255,0.023754666248957317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,31,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,63,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,63,0.014346666634082794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,127,0.016506666938463848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,127,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,255,0.026165333886941273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,511,0.05716800192991892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,4095,0.3150506615638733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,511,0.042378668983777366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,1023,0.10309333602587382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,1023,0.08669867118199666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,1,0.014538666854302088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,2047,0.1991680065790812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,3,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,2047,0.16089600324630737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,4095,0.3858400185902913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,8191,0.7747093041737875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,8191,0.6285279989242554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,31,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,1,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,3,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,63,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,7,0.014416000495354334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,7,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,15,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,15,0.015722667177518208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,31,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,63,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,511,0.03399466723203659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,127,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,127,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,2047,0.11063466469446818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,255,0.02250666668017705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,255,0.02290133386850357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,511,0.03801066676775614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,1023,0.07390399773915608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,1,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,1023,0.05778133372465769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,3,0.011690666278203329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,2047,0.13571199774742126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,4095,0.2569920023282369
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,4095,0.20587199926376343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,8191,0.529914657274882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,15,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,8191,0.40733333428700763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,1,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,3,0.009546666716535887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,63,0.009525333220760027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,7,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,7,0.010159999753038088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,15,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,31,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,31,0.010005333150426546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,63,0.010181333248813948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,127,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,127,0.012319999436537424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,255,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,255,0.012416000167528788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,511,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,4095,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,511,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,1023,0.018138666947682697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,1023,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,2047,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,2047,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,4095,0.028405333558718365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,8191,0.038693333665529885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,8191,0.03806933263937632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,16383,0.06492266555627187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,3,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,16383,0.055770665407180786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,7,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,32767,0.10619200269381206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,32767,0.09135466814041138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,1,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,1,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,3,0.010191999996701876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,7,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,15,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,15,0.00978133330742518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,31,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,31,0.010170666500926018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,63,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,63,0.010565333068370819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,127,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,127,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,255,0.010010666524370512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,255,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,511,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,1023,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,1023,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,2047,0.028837333122889202
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,2047,0.028229333460330963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,4095,0.03630933413902918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,4095,0.035760000348091125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,8191,0.05505600074927012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,8191,0.04875733455022176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,3,0.02492266645034154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,3,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,16383,0.0957973301410675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,16383,0.07957866787910461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,32767,0.16523200273513794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,32767,0.13952533404032388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,1,0.024826665719350178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,1,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,7,0.025568000972270966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,7,0.02607999990383784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,15,0.02643733223279317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,15,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,31,0.032698666055997215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,31,0.033029332756996155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,511,0.06844266752401988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,63,0.03274133304754893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,63,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,127,0.03387200087308884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,127,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,255,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,255,0.04057066639264425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,511,0.06566399832566579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,1023,0.13700266679128012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,1023,0.10837866862614949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,2047,0.25971200068791706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,2047,0.20837867259979248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,4095,0.5034079949061075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,4095,0.394538680712382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,7,0.00996800015370051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,1,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,1,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,3,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,63,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,3,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,7,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,15,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,15,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,31,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,31,0.010506667196750641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,127,0.00978133330742518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,127,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,255,0.0102186668664217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,255,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,511,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,511,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,4095,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,1023,0.02773866554101308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,1023,0.026880001028378803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,16383,0.14574933052062988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,2047,0.03193599979082743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,2047,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,4095,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,8191,0.08515200018882751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,8191,0.06582933167616527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,16383,0.11462933818499248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,32767,0.26825066407521564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,32767,0.20387732982635498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,1,0.04409599800904592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,1,0.04506133496761322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,3,0.04437333345413208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,3,0.04525866607824961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,7,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,7,0.047055999437967934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,15,0.0476693312327067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,15,0.048341333866119385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,127,0.06099733213583628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,255,0.08210666477680206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,31,0.0589279979467392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,31,0.060090666015942894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,63,0.06043200194835663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,63,0.060736000537872314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,127,0.06657066444555919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,255,0.08014933268229167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,511,0.12562666336695352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,511,0.12277866403261821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,1023,0.25702933470408124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,1023,0.20516266425450644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,2047,0.503386656443278
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,2047,0.3989280064900716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,1,0.0820000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,1,0.08385066191355388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,3,0.08230933547019958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,3,0.0843946635723114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,7,0.08524800340334575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,7,0.08742400010426839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,15,0.08839466174443562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,15,0.09038933118184407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,63,0.11522133151690166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,127,0.1237600048383077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,31,0.11335999766985576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,31,0.11354133486747742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,63,0.12129599849383037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,127,0.12387200196584065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,255,0.15414399902025858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,1,0.1569653352101644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,255,0.15198399623235068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,511,0.2385866641998291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,511,0.23403199513753256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,1,0.16142400105794272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,3,0.15722666184107462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,3,0.16127999623616537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,7,0.16425599654515585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,7,0.16834133863449097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,15,0.1808639963467916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,15,0.17427732547124228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,31,0.22698666652043661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,31,0.22579733530680338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,63,0.23362133900324503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,63,0.23568532864252725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,127,0.23822933435440063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,127,0.23914132515589395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,1,0.010346666599313417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,1,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,255,0.296997328599294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,3,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,255,0.2953493396441142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,3,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,7,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,7,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,15,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,15,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,31,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,31,0.010501333822806677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,63,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,63,0.011962667107582092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,127,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,127,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,255,0.012432000289360682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,255,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,511,0.014202666779359182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,511,0.013989333063364029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,4095,0.05842133363087972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,8191,0.132314662138621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,1023,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,1023,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,2047,0.04090133309364319
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,2047,0.03012266755104065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,4095,0.07166933516661327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,8191,0.10505599776903789
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,16383,0.2579519947369893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,16383,0.19926400979359946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,32767,0.6394826571146647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,32767,0.5796053409576416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,1,0.3065920074780782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,1,0.3149440089861552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,3,0.31405333677927655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,3,0.315226674079895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,7,0.34507731596628827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,7,0.334330677986145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,15,0.3678826491038005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,15,0.3659466505050659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,31,0.4451306660970052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,31,0.451530655225118
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,63,0.458352009455363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,63,0.4628213246663411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,127,0.46775468190511066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,127,0.47094933191935223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,1,0.660314679145813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,1,0.6504106521606445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,3,0.6713600158691406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,3,0.6779680252075195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,7,0.6903733412424723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,7,0.7029120127360026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,1,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,3,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,1,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,31,0.8949546813964844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,15,0.7238933245340983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,3,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,15,0.7348372936248779
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,7,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,31,0.8818026383717855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,63,0.9079039891560873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,7,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,63,0.9165120124816895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,15,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,15,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,31,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,31,0.016336000214020412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,63,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,511,0.024085332949956257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,63,0.01972266659140587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,127,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,127,0.019776000330845516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,255,0.020154666155576706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,255,0.01989866668979327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,511,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,1023,0.03908266623814901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,1023,0.034517332911491394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,2047,0.07474666833877563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,2047,0.05909866591294607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,4095,0.13577600320180258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,4095,0.1093280017375946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,8191,0.2568053404490153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,8191,0.20040533939997354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,1,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,1,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,16383,0.5072319904963175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,16383,0.38683199882507324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,3,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,3,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,7,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,7,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,15,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,31,0.01851733277241389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,15,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,31,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,63,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,63,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,511,0.07445333401362102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,127,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,1023,0.13527466853459677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,127,0.02254933367172877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,2047,0.2572266658147176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,255,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,255,0.03375466664632162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,511,0.0580266664425532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,1,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,8191,1.038688023885091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,1023,0.11026666561762492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,2047,0.20614933967590332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,4095,0.4987786610921224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,4095,0.3954240083694458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,8191,0.7906826337178549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,1,0.009872000043590864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,3,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,3,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,7,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,63,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,7,0.010453333457310995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,15,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,15,0.010234666367371878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,255,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,31,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,511,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,31,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,63,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,127,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,127,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,2047,0.028389332195123036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,255,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,511,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,1023,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,1023,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,2047,0.02864533414443334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,4095,0.03871466716130575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,4095,0.03809600075085958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,8191,0.0651093324025472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,8191,0.0555626650651296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,16383,0.10671466588973999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,16383,0.0941546658674876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,1,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,32767,0.18750399351119995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,1,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,32767,0.16569599509239197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,3,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,3,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,7,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,7,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,127,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,15,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,15,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,31,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,31,0.013477332890033722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,511,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,63,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,63,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,127,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,255,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,255,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,4095,0.054602667689323425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,511,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,8191,0.09450133641560872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,8191,0.08171733220418294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,1023,0.028751999139785767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,16383,0.1656053364276886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,1023,0.028336000939210255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,2047,0.03617066641648611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,1,0.024671999116738636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,1,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,32767,0.3025919993718465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,2047,0.03524799893299738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,4095,0.048709332942962646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,16383,0.14457066853841147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,7,0.026933332284291584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,32767,0.2685439984003703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,3,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,3,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,7,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,15,0.03268266717592875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,15,0.03310933212439219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,31,0.03279466678698858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,127,0.0400693342089653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,31,0.03310399999221166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,63,0.03369066615899404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,63,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,127,0.04562133550643921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,255,0.06844266752401988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,255,0.06577066580454509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,511,0.13724799950917563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,511,0.10884799559911092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,1023,0.26150399446487427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,1023,0.20801067352294922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,2047,0.4994239807128906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,2047,0.3965173165003459
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,1,0.010250666489203772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,3,0.010298666854699453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,4095,0.9820693333943685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,4095,0.7683626810709635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,1,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,3,0.010112000008424124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,7,0.009888000165422758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,7,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,15,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,15,0.010501333822806677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,255,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,127,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,31,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,31,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,63,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,63,0.012479999413092932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,127,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,255,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,511,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,4095,0.084714670976003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,511,0.027002667387326557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,1023,0.03159466634194056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,8191,0.14403200149536133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,1023,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,2047,0.04726399978001913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,2047,0.041850666205088295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,4095,0.06817066669464111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,8191,0.12087466319402058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,16383,0.262992004553477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,16383,0.2146186629931132
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,1,0.043893332282702126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,1,0.045007998744646706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,3,0.04541866481304169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,32767,0.5034399827321371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,32767,0.40677865346272785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,3,0.04709866642951965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,7,0.04747200012207031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,7,0.04797866443792979
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,15,0.059664001067479454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,15,0.059546664357185364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,31,0.06133866806825002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,31,0.059994667768478394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,63,0.06608533362547557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,63,0.061493332187334694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,127,0.0817493349313736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,127,0.08011733492215474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,255,0.12633599837621054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,255,0.12299733360608418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,511,0.2578666607538859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,511,0.20621333519617716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,1023,0.5050293207168579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,1023,0.39846400419871014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,2047,0.7643893559773763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,2047,0.9842133522033691
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,1,0.0815413345893224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,1,0.0846453309059143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,3,0.084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,3,0.08752000331878662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,7,0.08861866593360901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,7,0.09145599603652954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,63,0.12365333239237468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,15,0.11504000425338745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,15,0.11307199796040852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,31,0.1222773293654124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,31,0.11771200100580852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,63,0.12427199880282085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,127,0.15470932920773825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,127,0.1530346671740214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,255,0.24017600218454996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,255,0.23436800638834634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,511,0.49500266710917157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,511,0.39690665404001874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,1,0.15633066495259604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,1,0.1673439939816793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,3,0.1662506659825643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,3,0.1693920095761617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,7,0.1853920022646586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,7,0.17803732554117838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,15,0.23056000471115112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,15,0.2310826579729716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,31,0.2366080085436503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,31,0.2360693415006002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,63,0.23972799380620322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,63,0.23931199312210083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,127,0.29872000217437744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,127,0.29690666993459064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,255,0.46676798661549884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,3,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,1,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,255,0.4599306583404541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,1,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,3,0.009914666414260864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,31,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,7,0.009935999910036722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,15,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,7,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,15,0.010277333358923594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,31,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,63,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,63,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,127,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,127,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,255,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,255,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,511,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,511,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,1023,0.041135999063650765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,1023,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,2047,0.07120533287525177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,2047,0.057034666339556374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,4095,0.1323253313700358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,4095,0.10311999917030334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,8191,0.2540160020192464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,8191,0.19445333878199259
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,16383,0.5044053395589193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,16383,0.3790293137232463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,32767,1.248698631922404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,32767,1.1422773202260335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,1,0.33749866485595703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,1,0.3354133367538452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,3,0.35075732072194415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,3,0.358026663462321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,7,0.37806932131449383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,7,0.3683946530024211
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,15,0.4553440014521281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,15,0.4606240193049113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,31,0.4646986722946167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,127,0.5848533312479655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,31,0.46489067872365314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,63,0.47148799896240234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,63,0.47046399116516113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,127,0.5871200164159139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,1,0.6755572954813639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,1,0.6938666502634684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,3,0.6932373046875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,3,0.7134186426798502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,7,0.7296160062154134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,7,0.7504159609476725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,1,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,1,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,15,0.8995359738667806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,3,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,3,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,15,0.9141440391540527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,31,0.9219679832458496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,7,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,7,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,31,0.9218453566233317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,63,0.9328906536102295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,15,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,63,0.9320106506347656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,15,0.016362667083740234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,31,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,31,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,63,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,63,0.020058666666348774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,127,0.019871999820073444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,127,0.019914666811625164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,255,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,255,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,511,0.03926933308442434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,511,0.034448000291983284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,1023,0.07507733503977458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,1023,0.05884266893068949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,2047,0.1357439955075582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,2047,0.10744000474611919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,4095,0.2556053400039673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,4095,0.19829867283503214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,8191,0.4973599910736084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,8191,0.3779253164927165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,1,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,1,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,16383,0.9980426629384359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,3,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,3,0.014021333307027817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,16383,0.7450559933980306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,7,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,7,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,15,0.014426667243242264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,15,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,31,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,31,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,63,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,63,0.01635733370979627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,127,0.01642666632930438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,127,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,255,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,255,0.018751999984184902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,511,0.03179733455181122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,511,0.02638400097688039
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,1023,0.06200533111890157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,1023,0.047370667258898415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,2047,0.1211893359820048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,2047,0.08796800176302592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,4095,0.21624533335367838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,4095,0.17108800013860068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,1,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,8191,0.43214933077494305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,1,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,8191,0.33668800195058185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,3,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,3,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,7,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,7,0.010341333225369453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,15,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,127,0.010202666744589806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,15,0.009888000165422758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,31,0.010197333370645842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,31,0.009984000275532404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,63,0.009578666960199675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,127,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,255,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,1023,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,255,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,2047,0.01766933376590411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,511,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,511,0.012175999581813812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,1023,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,2047,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,16383,0.04826133449872335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,16383,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,1,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,32767,0.07825066645940144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,32767,0.09212799866994222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,4095,0.018277333428462345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,7,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,3,0.01003200002014637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,15,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,4095,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,8191,0.035775999228159584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,8191,0.035504000882307686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,1,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,3,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,15,0.010373333469033241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,31,0.009632000078757605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,31,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,63,0.013786666095256805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,127,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,127,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,255,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,2047,0.028181334336598713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,255,0.012543999900420507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,511,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,1023,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,1023,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,2047,0.028325334191322327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,4095,0.035877334574858345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,4095,0.03693866729736328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,8191,0.05151999990145365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,8191,0.04822933177153269
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,16383,0.0775786687930425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,16383,0.09237866600354512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,32767,0.16055466731389365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,32767,0.13831999897956848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,1,0.019765333582957584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,1,0.020303999384244282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,3,0.019946667055288952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,3,0.020197333147128422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,7,0.020453333854675293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,7,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,15,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,15,0.021514666577180225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,31,0.02584533393383026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,31,0.026378666361172993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,63,0.02609066665172577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,63,0.026394667724768322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,127,0.02640533447265625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,127,0.026575999955336254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,255,0.03530666728814443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,255,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,511,0.05382933219273885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,511,0.05093333125114441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,1023,0.10613333185513814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,1023,0.08332266906897227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,2047,0.1990293264389038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,2047,0.1590559979279836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,1,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,4095,0.3776906728744507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,4095,0.30057599147160846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,1,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,15,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,3,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,3,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,63,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,7,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,7,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,15,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,127,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,31,0.010293333480755487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,31,0.010309333602587381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,127,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,255,0.010186666622757912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,255,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,511,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,511,0.01251199965675672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,4095,0.04357333481311798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,1023,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,1023,0.02678400029738744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,2047,0.030954666435718536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,2047,0.031162666777769726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,4095,0.04123199979464213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,8191,0.06380266447861989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,8191,0.07568533221880595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,1,0.03461333364248276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,16383,0.12962133685747781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,16383,0.10942400495211284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,32767,0.23278933763504028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,1,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,7,0.03642666588226954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,32767,0.19798400004704794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,15,0.03788266579310099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,31,0.04636266827583313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,3,0.03493333359559377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,3,0.035402665535608925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,7,0.03677333394686381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,15,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,31,0.046906664967536926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,63,0.04628799855709076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,63,0.04748799900213877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,127,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,127,0.04749333361784617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,255,0.06392533580462138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,255,0.06218666831652323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,511,0.097871998945872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,511,0.09438932935396831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,1023,0.19820266962051392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,1023,0.15763200322786966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,2047,0.30291199684143066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,2047,0.3824426730473836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,1,0.06342400113741557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,1,0.06469866633415222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,3,0.06351999938488007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,31,0.08530666430791219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,3,0.06502399841944377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,7,0.06560533245404561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,7,0.06746133168538411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,15,0.06797866523265839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,15,0.06949333349863689
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,31,0.08649067083994548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,63,0.09261866410573323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,63,0.08721066514650981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,127,0.09473066528638203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,127,0.09532800316810608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,255,0.11826133728027344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,255,0.11585066715876262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,511,0.18232532342274985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,511,0.17748266458511353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,1,0.11987732847531636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,1,0.12270933389663696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,3,0.1206719974676768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,3,0.12285332878430684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,7,0.12461333473523457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,7,0.12796266873677573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,15,0.13216533263524374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,15,0.13220266501108804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,31,0.17171732584635416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,31,0.16736533244450888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,63,0.17772799730300903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,63,0.17891200383504233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,127,0.18138132492701212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,127,0.1811359922091166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,1,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,255,0.2251360019048055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,7,0.010165333126982054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,15,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,255,0.2232053279876709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,1,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,3,0.009583999713261923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,3,0.010250666489203772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,7,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,15,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,31,0.010549332946538925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,31,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,63,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,63,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,511,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,127,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,127,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,255,0.012538666526476542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,255,0.012170666207869848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,511,0.014058666924635569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,1023,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,1023,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,2047,0.03054933249950409
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,2047,0.029866665601730347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,4095,0.06172800064086914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,4095,0.0521919975678126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,8191,0.11213333408037822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,8191,0.10272533694903056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,16383,0.19569599628448486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,16383,0.21634666124979654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,32767,0.5626240173975626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,32767,0.5897226730982462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,1,0.23268266518910727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,1,0.23880000909169516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,3,0.23336533705393472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,3,0.23965867360432944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,7,0.2512106696764628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,7,0.2493706742922465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,15,0.27642132838567096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,31,0.3356800079345703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,31,0.3406453529993693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,15,0.26708799600601196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,63,0.34590399265289307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,63,0.3487786849339803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,127,0.3531680107116699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,127,0.35366400082906085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,1,0.47490668296813965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,1,0.47177600860595703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,3,0.5011519988377889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,3,0.4842613140741984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,7,0.5207253297170004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,7,0.5256746610005697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,15,0.5458559989929199
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,1,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,1,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,3,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,3,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,15,0.5538133382797241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,7,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,31,0.6637333234151205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,7,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,15,0.014303999642531076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,31,0.673904021581014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,63,0.6832799911499023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,15,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,31,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,63,0.6895039876302084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,31,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,63,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,63,0.01634666696190834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,127,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,255,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,1023,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,127,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,255,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,511,0.019989332805077236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,511,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,1023,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,2047,0.06464533507823944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,2047,0.04764799773693085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,4095,0.11318932970364888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,4095,0.08849066495895386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,8191,0.21285333236058554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,8191,0.1692426602045695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,16383,0.44512001673380536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,1,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,1,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,16383,0.3248160084088643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,3,0.013925333817799887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,3,0.01443733274936676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,7,0.013989333063364029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,7,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,15,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,15,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,31,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,31,0.016282666474580765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,63,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,63,0.016282666474580765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,127,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,127,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,255,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,255,0.026517334083716076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,511,0.06181866427262624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,511,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,1023,0.11478933691978455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,1023,0.08890133102734883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,2047,0.21609065930048624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,2047,0.17443732420603433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,4095,0.4485386610031128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,4095,0.33349335193634033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,1,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,1,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,8191,0.8439413706461588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,8191,0.6634399890899658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,3,0.010538666198650995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,3,0.009935999910036722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,7,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,7,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,15,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,15,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,127,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,31,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,255,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,31,0.010565333068370819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,63,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,63,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,127,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,255,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,511,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,511,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,1023,0.017674667139848072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,1023,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,2047,0.0182239996890227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,2047,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,4095,0.03569599986076355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,16383,0.09285333752632141
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,4095,0.03508266558249792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,8191,0.05415999889373779
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,8191,0.04836266736189524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,16383,0.07983466486136119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,1,0.009925333162148794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,32767,0.1602826714515686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,1,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,32767,0.14339199662208557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,3,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,3,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,7,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,7,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,15,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,15,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,31,0.010165333126982054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,31,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,63,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,63,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,127,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,127,0.011962667107582092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,1023,0.027653334041436512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,255,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,255,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,511,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,511,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,8191,0.09155733386675517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,1023,0.028079998989899952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,2047,0.034645333886146545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,2047,0.034789333740870156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,4095,0.05096533397833506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,4095,0.04770666857560476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,8191,0.07993066807587941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,16383,0.15986133615175882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,16383,0.14314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,1,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,7,0.02082666630546252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,32767,0.2978293299674988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,32767,0.2617759903271993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,1,0.019871999820073444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,3,0.01977066695690155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,63,0.02606933315594991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,3,0.020549333343903225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,63,0.025941332181294758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,7,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,15,0.025802666942278545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,15,0.02587733417749405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,31,0.02593066543340683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,31,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,127,0.03494933247566223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,127,0.030938667555650074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,255,0.05356266597906748
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,255,0.050853331883748375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,511,0.10620799660682678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,511,0.08352532982826233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,4095,0.7409813404083252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,1023,0.19788267215092978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,1023,0.1584053337574005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,2047,0.3811093171437581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,2047,0.3019520044326782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,1,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,1,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,4095,0.5822399854660034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,3,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,3,0.010255999863147736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,7,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,7,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,15,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,15,0.010234666367371878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,31,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,31,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,63,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,63,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,127,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,127,0.012335999558369318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,255,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,255,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,511,0.026848000784715016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,511,0.026613332331180573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,1023,0.030928000807762146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,1023,0.031066666046778362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,2047,0.043466667334238686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,2047,0.041189332803090416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,4095,0.07506666580835979
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,16383,0.23146667083104452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,4095,0.06532266736030579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,8191,0.127920001745224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,8191,0.1148426632086436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,16383,0.20919466018676758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,1,0.03461333364248276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,32767,0.4379306634267171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,7,0.03629333277543386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,1,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,32767,0.3949333429336548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,3,0.03581333408753077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,3,0.03681066632270813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,7,0.037920000652472176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,15,0.04631466666857401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,15,0.04577599962552389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,31,0.04637866715590159
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,127,0.06273066500822704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,255,0.09734933574994405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,31,0.04651733239491781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,63,0.05198400219281515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,63,0.04683200021584829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,127,0.06435200075308482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,255,0.09450667103131612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,511,0.19880000750223795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,511,0.1574613352616628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,1023,0.3031146725018819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,3,0.06523199876149495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,1023,0.38344534238179523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,1,0.06312533219655354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,2047,0.7438879807790121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,1,0.06485866506894429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,2047,0.5808106660842896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,3,0.06684799989064534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,7,0.06791466474533081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,7,0.06984533369541168
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,15,0.0863200028737386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,15,0.08665066957473755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,31,0.09327999750773112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,31,0.08739200234413147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,63,0.09563733140627544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,63,0.09469333291053772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,127,0.11826666196187337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,127,0.11719466249148051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,255,0.18336532513300577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,255,0.17859200636545816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,511,0.37679465611775714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,511,0.30082132418950397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,3,0.12876799702644348
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,1,0.11890133221944173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,1,0.12220799922943115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,3,0.124208003282547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,7,0.13499733805656433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,7,0.13427733381589255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,15,0.17506666978200278
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,15,0.16967999935150146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,31,0.17940266927083334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,63,0.18172800540924072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,31,0.17915733655293783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,63,0.18109333515167236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,127,0.22710933287938437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,127,0.2251466711362203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,255,0.35385600725809735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,1,0.009477333476146063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,1,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,255,0.3470613161722819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,3,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,3,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,7,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,7,0.010159999753038088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,15,0.010437333335479101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,15,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,31,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,31,0.01191466674208641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,63,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,63,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,127,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,127,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,255,0.01404800017674764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,255,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,511,0.01833600054184596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,4095,0.11318932970364888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,511,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,1023,0.029866665601730347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,1023,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,2047,0.061381335059801735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,2047,0.051274667183558144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,4095,0.10079999764760335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,8191,0.21380800008773804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,8191,0.1902666687965393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,16383,0.42135465145111084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,16383,0.3685386578241984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,32767,1.1661866505940754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,32767,1.1062133312225342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,1,0.2411200006802877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,3,0.2616906762123108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,1,0.2417973279953003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,3,0.25819732745488483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,7,0.27856000264485675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,7,0.2828906575838725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,15,0.34755198160807294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,15,0.34194668134053546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,31,0.3501439889272054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,31,0.3511146704355876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,63,0.3554453452428182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,63,0.35437333583831787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,127,0.4432426691055298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,127,0.4408479928970337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,1,0.524288018544515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,1,0.5102506478627523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,3,0.5244373480478922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,3,0.5382879972457886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,7,0.5498773256937662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,7,0.5651359955469767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,15,0.6868159770965576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,15,0.6765279769897461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,31,0.6931893030802408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,1,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,1,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,3,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,31,0.6962453524271647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,3,0.013957332819700241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,7,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,7,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,63,0.7016800244649252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,15,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,63,0.7053600152333578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,15,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,31,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,255,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,31,0.016384000579516094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,63,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,63,0.016341333587964375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,127,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,127,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,255,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,511,0.0322080006202062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,511,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,1023,0.06502933303515117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,1023,0.04794133206208547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,2047,0.11379733681678772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,2047,0.08779199918111165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,4095,0.21474667390187582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,4095,0.16950400670369467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,8191,0.41052265961964923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,8191,0.32211732864379883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,1,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,16383,0.8705600102742513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,16383,0.6326719919840494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,3,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,3,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,7,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,7,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,15,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,15,0.015664000064134598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,255,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,31,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,31,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,63,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,63,0.019834666202465694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,127,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,127,0.01985599969824155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,255,0.020106667031844456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,511,0.023728000621000927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,511,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,1023,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,1023,0.034501334031422935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,8191,0.25730667511622113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,2047,0.0745119998852412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,2047,0.059343998630841575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,4095,0.13619200388590494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,4095,0.10922132929166158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,8191,0.20044799645741782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,16383,0.5076053142547607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,7,0.010309333602587381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,1,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,15,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,31,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,1,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,16383,0.384549339612325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,3,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,127,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,3,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,255,0.010319999729593595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,7,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,15,0.010421333213647207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,31,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,63,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,63,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,127,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,255,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,511,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,1023,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,1023,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,2047,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,2047,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,4095,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,4095,0.017727999637524288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,8191,0.02882133424282074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,8191,0.02849599967400233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,16383,0.038773333032925926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,16383,0.038592000802357994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,32767,0.06834133466084798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,32767,0.05756799876689911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,1,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,1,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,65535,0.11646933356920879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,3,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,65535,0.10200533270835876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,3,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,31,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,7,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,7,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,15,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,15,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,31,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,63,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,127,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,127,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,255,0.00978133330742518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,255,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,511,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,511,0.012351999680201212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,1023,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,1023,0.016490666816631954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,2047,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,16383,0.06629333396752675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,2047,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,4095,0.02886933336655299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,4095,0.027893332143624622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,8191,0.038831998904546104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,8191,0.03781333317359289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,16383,0.05575466652711233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,32767,0.09154133001963298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,32767,0.10593600074450175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,1,0.014490666488806406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,1,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,3,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,15,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,3,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,7,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,65535,0.18837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,7,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,65535,0.15793599685033163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,15,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,31,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,31,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,63,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,63,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,127,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,127,0.019765333582957584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,255,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,255,0.022831998765468597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,511,0.03786666691303253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,511,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,1023,0.07451733450094859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,1023,0.05830933153629303
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,2047,0.1363200048605601
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,1,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,2047,0.11040533582369487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,4095,0.2579360008239746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,4095,0.20537066459655762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,8191,0.40414400895436603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,1,0.00985599992175897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,8191,0.5288053353627523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,3,0.009546666716535887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,3,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,7,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,7,0.009914666414260864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,15,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,15,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,31,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,31,0.010112000008424124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,63,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,63,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,127,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,127,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,255,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,255,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,511,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,4095,0.03640000025431315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,511,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,8191,0.04868266483147939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,1023,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,1023,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,2047,0.028730665644009907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,2047,0.028213332096735638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,4095,0.03514666606982549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,8191,0.05561600128809611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,16383,0.09460266431172688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,16383,0.0798933357000351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,32767,0.16450132926305136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,32767,0.14041067163149515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,1,0.024490666886170704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,65535,0.30535467465718585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,3,0.025626666843891144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,1,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,3,0.02462933212518692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,65535,0.2566933234532674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,7,0.025631998976071674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,31,0.033088001112143196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,7,0.025775998830795288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,15,0.026426665484905243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,15,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,31,0.032458665470282234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,63,0.033013333876927696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,63,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,127,0.033861334125200905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,127,0.033189333975315094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,255,0.04543466866016388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,255,0.04072533299525579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,511,0.06863999863465627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,511,0.06569600105285645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,1023,0.13784533739089966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,1023,0.10771200060844421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,2047,0.25973333915074664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,2047,0.2074986696243286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,4095,0.5012746651967367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,4095,0.3930826584498088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,1,0.04428266485532125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,1,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,3,0.04423999786376953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,3,0.044879997769991554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,7,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,63,0.06035733222961426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,7,0.047194664676984154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,15,0.04764799773693085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,15,0.04821866750717163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,31,0.0591893345117569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,31,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,63,0.05956799785296122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,127,0.06635733445485432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,127,0.060693333546320595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,255,0.0817333310842514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,255,0.08012266457080841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,1023,0.20436267058054605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,511,0.12563733259836832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,511,0.12242666880289714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,1023,0.2584373354911804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,1,0.0837653378645579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,1,0.08229866623878479
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,3,0.08270933230717976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,3,0.08410666386286418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,7,0.08542399605115254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,7,0.08746133248011272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,15,0.0884320040543874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,31,0.11329066753387451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,15,0.09005332986513774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,31,0.11365866661071777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,63,0.12120532989501953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,63,0.11565867066383362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,1,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,127,0.12378133336702983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,127,0.12333333492279053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,255,0.15151466925938925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,255,0.15361600120862326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,511,0.23831466833750406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,1,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,511,0.23366934061050415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,3,0.0102186668664217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,3,0.010255999863147736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,7,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,7,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,15,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,15,0.010079999764760336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,31,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,31,0.010181333248813948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,63,0.010549332946538925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,127,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,127,0.012175999581813812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,255,0.010175999874869982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,255,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,511,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,511,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,1023,0.027877333263556164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,1023,0.027615999182065327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,2047,0.032655999064445496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,2047,0.030933332939942677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,4095,0.048325334986050926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,4095,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,8191,0.08504000306129456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,8191,0.06551999847094218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,16383,0.14618133505185446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,16383,0.11499733726183574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,32767,0.2662079930305481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,32767,0.2041706641515096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,1,0.15785066286722818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,1,0.16141333182652792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,65535,0.5168319940567017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,65535,0.387114683787028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,3,0.15804266929626465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,3,0.16167466839154562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,7,0.16389866669972739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,7,0.16775999466578165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,15,0.18078933159510294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,15,0.17436800400416055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,31,0.22608532508214316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,31,0.22601600488026938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,63,0.23330666621526083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,127,0.23790399233500162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,63,0.23464532693227133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,127,0.23822933435440063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,255,0.29630400737126666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,255,0.29401065905888873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,3,0.31437865893046063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,1,0.3084266583124797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,1,0.3162933389345805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,3,0.31626667579015094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,7,0.34517868359883624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,15,0.36655465761820477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,7,0.3344213167826335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,15,0.3663146495819092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,1,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,1,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,31,0.4447253147761027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,127,0.4671093225479126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,31,0.45140798886617023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,63,0.4578506549199422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,63,0.4620853265126546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,3,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,7,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,127,0.46850132942199707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,7,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,15,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,15,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,31,0.010112000008424124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,31,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,63,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,63,0.011306667079528173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,127,0.011920000116030375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,127,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,255,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,255,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,511,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,511,0.013514666507641474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,1023,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,1023,0.018170667191346485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,2047,0.04058133314053217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,2047,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,4095,0.07128533224264781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,16383,0.19685333967208862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,4095,0.057861333092053734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,8191,0.13476799925168356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,8191,0.10410133004188538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,16383,0.2584639986356099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,1,0.01444799949725469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,1,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,3,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,32767,0.6435306469599406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,32767,0.5714826583862305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,3,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,7,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,7,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,15,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,15,0.015813333292802174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,127,0.01972266659140587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,31,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,31,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,63,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,63,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,127,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,255,0.023567999402681988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,255,0.02271466702222824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,511,0.038949333131313324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,511,0.03417066733042399
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,1023,0.059061333537101746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,1023,0.0739519993464152
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,2047,0.1360319952170054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,2047,0.10724799831708272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,4095,0.2561066746711731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,4095,0.1974666714668274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,8191,0.49823999404907227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,8191,0.3774079879124959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,1,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,1,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,16383,0.9910826683044434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,16383,0.7462186813354492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,3,0.009546666716535887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,3,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,7,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,7,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,15,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,15,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,31,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,31,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,63,0.012074666718641916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,63,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,127,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,127,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,255,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,255,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,511,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,511,0.01637866720557213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,1023,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,1023,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,2047,0.017717332889636356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,2047,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,4095,0.02824000020821889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,4095,0.027930667002995808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,8191,0.03904533386230469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,8191,0.037978666524092354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,16383,0.06860266625881195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,16383,0.05754133562246958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,32767,0.11534399787584941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,32767,0.10515200098355611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,1,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,1,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,3,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,65535,0.20725866158803305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,3,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,65535,0.18363199631373087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,7,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,7,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,15,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,15,0.00978133330742518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,31,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,31,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,63,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,63,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,127,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,127,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,255,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,255,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,511,0.01603200038274129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,511,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,1023,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,1023,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,2047,0.02775999903678894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,2047,0.028079998989899952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,4095,0.03832533210515976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,4095,0.037263999382654824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,8191,0.06406933565934499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,8191,0.05518400172392527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,32767,0.16555733482042947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,16383,0.10549867153167725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,16383,0.09521067142486572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,32767,0.18692266941070557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,1,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,1,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,3,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,65535,0.35391998291015625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,3,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,65535,0.3110026717185974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,7,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,63,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,7,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,15,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,15,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,31,0.01863466699918111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,31,0.018725333114465077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,63,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,127,0.02277333289384842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,127,0.022309333086013794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,255,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,255,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,511,0.0743146687746048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,511,0.058373332023620605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,1023,0.13551466663678488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,1023,0.10989333192507426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,2047,0.256602664788564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,2047,0.20586133003234863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,4095,0.4981066783269246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,4095,0.39470934867858887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,1,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,1,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,3,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,3,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,8191,1.036688009897868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,7,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,8191,0.7914613087972006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,63,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,15,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,7,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,15,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,31,0.010144000252087912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,31,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,63,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,127,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,127,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,255,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,255,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,511,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,511,0.3116746743520101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,4095,0.0487360010544459
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,1023,0.028229333460330963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,1023,0.027957332630952198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,2047,0.035962666074434914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,2047,0.034634667138258614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,4095,0.054042667150497437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,8191,0.09490133325258891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,8191,0.08299200236797333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,16383,0.17340266704559326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,16383,0.14497599999109903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,32767,0.3014400005340576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,32767,0.2688800096511841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,1,0.024453334510326385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,1,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,3,0.026213333010673523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,3,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,7,0.025834667185942333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,7,0.026922665536403656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,65535,0.5766933361689249
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,15,0.032602667808532715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,15,0.03297066688537598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,31,0.03271999955177307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,65535,0.5080533425013224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,127,0.04045866678158442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,31,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,63,0.03369066615899404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,63,0.03299733251333237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,127,0.045328001181284584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,255,0.0687253326177597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,255,0.06566399832566579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,511,0.1376106639703115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,511,0.10815466443697612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,1023,0.26027733087539673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,1023,0.20806399981180826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,2047,0.5046986738840739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,2047,0.3964373270670573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,1,0.04330666859944662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,4095,0.7677013079325358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,1,0.04524800181388855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,4095,0.9838773409525553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,3,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,3,0.046821330984433494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,7,0.04713066418965658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,7,0.04809066653251648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,15,0.059392000238100685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,15,0.059658666451772056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,31,0.061349332332611084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,31,0.06030400097370148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,63,0.0665280024210612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,63,0.060965334375699363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,127,0.08177599807580312
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,127,0.08062933385372162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,255,0.12640000383059183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,255,0.12284800410270691
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,511,0.25890133778254193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,511,0.20665599902470908
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,1023,0.3975093364715576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,1,0.08436266581217448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,1023,0.5052586793899536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,1,0.08193066716194153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,3,0.08459200461705525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,3,0.08769599596659343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,7,0.08847467104593913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,31,0.11780266960461934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,7,0.09066667159398396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,15,0.11552000045776367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,31,0.12223999698956807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,63,0.12454400459925334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,15,0.11341866850852966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,63,0.12436800201733907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,127,0.15450132886568704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,511,0.3970773220062256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,127,0.1530080040295919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,255,0.239738663037618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,255,0.23477333784103394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,1,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,511,0.4952426751454671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,1,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,3,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,3,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,7,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,63,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,7,0.009882666791478792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,15,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,15,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,31,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,31,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,63,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,127,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,127,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,255,0.012608000387748083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,255,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,511,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,511,0.026693334182103474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,1023,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,1023,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,2047,0.047456001242001854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,2047,0.042026668787002563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,4095,0.08389332890510559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,4095,0.06794666747252147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,8191,0.1443893313407898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,8191,0.12057066957155864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,16383,0.2645333409309387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,16383,0.21563732624053955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,32767,0.5046826601028442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,32767,0.4068106810251872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,1,0.15715733170509338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,1,0.1613759994506836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,3,0.16640533010164896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,65535,0.994208017985026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,65535,0.8001600106557211
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,3,0.16899732748667398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,7,0.1856693426767985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,7,0.17942933241526285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,15,0.23044800758361816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,31,0.2360746661822001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,15,0.23106666405995688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,31,0.23614400625228882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,63,0.2397493322690328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,63,0.23891200621922812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,255,0.4668320020039876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,127,0.29891733328501385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,127,0.29687466224034625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,255,0.4596319993336995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,3,0.3581013282140096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,1,0.33823466300964355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,1,0.3372746706008911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,3,0.3511679967244466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,7,0.36897067228953045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,7,0.3784693479537964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,15,0.45370666186014813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,15,0.4604746500651042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,1,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,1,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,31,0.4643893241882324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,3,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,31,0.4647573232650757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,63,0.47046399116516113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,3,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,7,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,7,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,63,0.46986667315165204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,15,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,15,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,127,0.5878880023956299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,31,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,31,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,63,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,127,0.5854293505350748
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,63,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,511,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,127,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,127,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,255,0.013722666849692663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,255,0.013482666263977686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,4095,0.13169067104657492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,511,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,1023,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,8191,0.25380265712738037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,1023,0.030117332935333252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,2047,0.07096533477306366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,2047,0.05613866448402405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,4095,0.10301867127418518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,8191,0.192848006884257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,16383,0.5038506587346395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,3,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,16383,0.38042132059733075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,1,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,1,0.014533333480358124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,3,0.01403733342885971
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,7,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,7,0.014661333213249842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,32767,1.2517653306325276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,15,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,32767,1.1462773482004802
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,15,0.014549333602190018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,31,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,31,0.013951999445756277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,63,0.015935999651749928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,63,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,127,0.01628799984852473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,1023,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,127,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,255,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,255,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,511,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,4095,0.08787733316421509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,511,0.018485333770513535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,1023,0.026565333207448322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,2047,0.06565333406130473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,2047,0.04785599807898203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,4095,0.1144480009873708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,8191,0.2145493427912394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,8191,0.16866666078567505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,1,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,7,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,16383,0.3220906654993693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,1,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,16383,0.44143998622894287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,3,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,3,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,7,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,15,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,15,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,31,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,31,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,63,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,63,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,127,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,127,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,255,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,255,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,511,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,511,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,1023,0.016421332955360413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,1023,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,2047,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,2047,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,4095,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,4095,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,8191,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,8191,0.01770666614174843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,16383,0.02170666555563609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,16383,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,32767,0.2004800041516622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,1,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,3,0.008197333042820295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,32767,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,1,0.008394666636983553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,65535,0.060880000392595925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,3,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,65535,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,7,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,7,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,15,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,15,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,31,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,31,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,63,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,63,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,127,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,127,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,255,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,255,0.01181866725285848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,511,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,511,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,1023,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,1023,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,2047,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,2047,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,4095,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,4095,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,8191,0.03555733213822047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,8191,0.035002666215101876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,16383,0.051370665431022644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,16383,0.047509332497914634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,32767,0.09129066268603007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,32767,0.07838933169841766
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,1,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,1,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,3,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,3,0.013434667140245438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,7,0.014602666099866232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,65535,0.1590933303038279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,65535,0.13768532872200012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,7,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,15,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,15,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,127,0.01628799984852473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,31,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,31,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,63,0.015813333292802174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,63,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,127,0.016149333367745083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,255,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,255,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,511,0.031104000906149547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,511,0.026426665484905243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,1023,0.06158933540185293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,1023,0.04645866652329763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,2047,0.11526933312416077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,2047,0.08795733253161113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,4095,0.21832533677419028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,4095,0.17128000656763712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,1,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,8191,0.4278080066045125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,15,0.008367999767263731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,8191,0.33689598242441815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,1,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,3,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,3,0.010154666379094124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,7,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,7,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,15,0.00996800015370051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,31,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,31,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,63,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,63,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,127,0.010245333115259806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,127,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,255,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,255,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,511,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,511,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,1023,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,1023,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,2047,0.028064000109831493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,2047,0.027994667490323383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,4095,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,4095,0.035242666800816856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,8191,0.051551997661590576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,8191,0.048122664292653404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,16383,0.0918880005677541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,16383,0.07833066582679749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,32767,0.15914133191108704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,32767,0.13709333539009094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,1,0.01987733319401741
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,3,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,1,0.019978666057189304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,3,0.02011200040578842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,7,0.02035733312368393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,7,0.02059200033545494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,15,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,65535,0.249834676583608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,65535,0.29636265834172565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,15,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,31,0.02587733417749405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,31,0.02608533451954524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,63,0.025829332570234936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,63,0.02625600000222524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,127,0.02588266630967458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,127,0.02646933247645696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,255,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,255,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,511,0.05388799806435903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,511,0.050581331054369606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,1023,0.105295995871226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,1023,0.08334933718045552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,2047,0.19778666893641153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,2047,0.15869866808255514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,1,0.03461333364248276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,4095,0.3787253300348918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,3,0.035162667433420815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,1,0.03522133330504099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,3,0.03426666557788849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,4095,0.3007040023803711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,7,0.035631999373435974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,7,0.03667200108369192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,15,0.037231999138991036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,15,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,31,0.04621866842110952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,31,0.04572266836961111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,63,0.046256000796953835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,63,0.04684266448020935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,127,0.052154665191968284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,127,0.04738666613896688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,255,0.06402133405208588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,255,0.06201066573460897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,511,0.09799466530481975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,511,0.09462400277455647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,1023,0.19883199532826742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,3,0.06422933439413707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,1023,0.15705066919326782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,1,0.06347733239332835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,1,0.06485333542029063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,3,0.06344533463319142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,7,0.0653599997361501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,7,0.06715733309586842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,15,0.0687253326177597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,15,0.06782400111357371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,31,0.08498666683832805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,127,0.09492266178131104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,31,0.08647466699282329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,63,0.09268266956011455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,63,0.0869653324286143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,127,0.09383466839790344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,255,0.11739200353622437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,511,0.17800533771514893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,3,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,255,0.1162453293800354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,1,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,511,0.1828213334083557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,1,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,3,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,7,0.008165333420038223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,7,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,15,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,15,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,31,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,31,0.009925333162148794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,63,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,511,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,63,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,127,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,127,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,255,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,255,0.012346666306257248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,511,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,1023,0.02701866626739502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,1023,0.026752000053723652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,2047,0.031066666046778362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,2047,0.030645333230495453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,4095,0.043749332427978516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,4095,0.041034666200478874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,8191,0.07514666517575581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,8191,0.0640533318122228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,16383,0.13029866417249045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,16383,0.10911466677983601
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,32767,0.24821333090464273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,32767,0.19750932852427164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,65535,0.44284268220265705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,65535,0.37223466237386066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,1,0.11962667107582092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,1,0.12273599704106648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,3,0.11986666917800903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,3,0.12222400307655334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,7,0.12398933370908101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,7,0.12782933314641318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,15,0.13144532839457193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,15,0.1318986713886261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,31,0.17196800311406454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,63,0.17618133624394736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,31,0.16738667090733847
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,63,0.17814399798711142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,127,0.1809813380241394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,127,0.18103466431299844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,255,0.2248213291168213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,255,0.2228426734606425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,1,0.23214399814605713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,3,0.233189324537913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,1,0.23866132895151773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,3,0.23896533250808716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,7,0.2505600055058797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,15,0.26588799556096393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,7,0.24869867165883383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,15,0.27643734216690063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,31,0.3351626793543498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,1,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,1,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,3,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,3,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,7,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,127,0.35229333241780597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,15,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,31,0.34036266803741455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,63,0.345578670501709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,7,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,15,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,63,0.3486773173014323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,127,0.35388267040252686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,31,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,31,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,63,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,63,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,127,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,127,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,255,0.011733333269755045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,255,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,511,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,511,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,1023,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,1023,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,2047,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,2047,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,4095,0.06131733457247416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,4095,0.05225066840648651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,8191,0.11168533563613892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,32767,0.5870453516642252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,8191,0.10248532891273499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,16383,0.21411732832590738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,16383,0.19342933098475137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,1,0.013440000514189402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,1,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,3,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,32767,0.5618133147557577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,3,0.013408000270525614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,7,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,7,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,15,0.014053333550691605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,15,0.014261333892742792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,31,0.01599466676513354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,31,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,63,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,63,0.01640533283352852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,127,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,127,0.01647466669480006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,255,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,255,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,511,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,511,0.026848000784715016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,1023,0.06310933331648509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,1023,0.04850133260091146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,2047,0.11322666207949321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,2047,0.08698667089144389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,4095,0.2143626610438029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,4095,0.16912533839543661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,16383,0.8702452977498373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,16383,0.6320586601893107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,3,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,7,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,8191,0.4086933135986328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,8191,0.3226240078608195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,1,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,31,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,1,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,3,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,7,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,15,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,15,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,31,0.0100853331387043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,63,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,63,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,511,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,127,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,127,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,255,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,255,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,511,0.015925332903862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,4095,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,1023,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,1023,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,2047,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,2047,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,4095,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,8191,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,8191,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,16383,0.02863999952872594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,16383,0.027642667293548584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,32767,0.0599839985370636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,32767,0.045082668463389076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,1,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,3,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,65535,0.1055413285891215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,3,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,65535,0.08489599823951721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,7,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,7,0.009488000224033991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,15,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,15,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,31,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,31,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,63,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,63,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,127,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,127,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,255,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,255,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,511,0.016309333344300587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,511,0.016117333124081295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,1023,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,1023,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,2047,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,2047,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,4095,0.03502399971087774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,4095,0.03480000048875809
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,8191,0.051498666405677795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,8191,0.048138668139775596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,16383,0.09225599964459737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,16383,0.07961066563924153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,32767,0.1605226695537567
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,32767,0.14243200421333313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,1,0.013605333864688873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,1,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,65535,0.26267733176549274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,3,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,65535,0.2965280016263326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,3,0.013925333817799887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,7,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,7,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,15,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,15,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,31,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,31,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,63,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,63,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,127,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,127,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,255,0.03107200066248576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,255,0.026416001220544178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,511,0.06168533364931742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,511,0.047295997540156044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,1023,0.11415466666221619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,1023,0.0890826682249705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,2047,0.21624000867207846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,2047,0.1745599905649821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,4095,0.4171466827392578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,4095,0.33371734619140625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,1,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,1,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,3,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,8191,0.8309226830800375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,8191,0.6652106841405233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,3,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,7,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,7,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,15,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,15,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,31,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,31,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,63,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,63,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,127,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,127,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,255,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,255,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,511,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,511,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,1023,0.028373333315054577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,1023,0.027962667246659596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,2047,0.03555200000603994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,2047,0.03461866577466329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,4095,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,4095,0.04820266862710317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,8191,0.09173333644866943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,8191,0.08004266520341237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,16383,0.1587999959786733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,16383,0.14282666643460593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,32767,0.29755733410517377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,32767,0.2608693242073059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,1,0.019626667102177937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,65535,0.5697600046793619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,1,0.020197333147128422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,3,0.02027200038234393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,65535,0.49641601244608563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,3,0.02027733375628789
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,7,0.020762667059898376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,7,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,15,0.025909334421157837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,15,0.026122666895389557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,31,0.026021334032217663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,31,0.025957333544890087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,63,0.025749333202838898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,255,0.05339199801286062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,63,0.026149332523345947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,127,0.03530666728814443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,127,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,255,0.05125333368778229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,511,0.10560533404350281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,511,0.08362666765848796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,1023,0.19854400555292764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,1023,0.15948800245920816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,2047,0.3786826531092326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,2047,0.3016480008761088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,1,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,1,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,4095,0.7422613302866617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,4095,0.5824693441390991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,3,0.03555200000603994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,3,0.036730666955312095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,7,0.03681600093841553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,7,0.03754666695992152
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,15,0.04631466666857401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,15,0.046853333711624146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,31,0.04683733483155569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,31,0.04666133224964142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,63,0.051856001218159996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,63,0.047050664822260536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,127,0.06228800117969513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,127,0.06405866642793019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,255,0.09781333804130554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,255,0.09483733773231506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,511,0.19834667444229126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,511,0.15878400206565857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,1023,0.38307734330495197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,1,0.06276266773541768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,1023,0.30405332644780475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,1,0.06514133512973785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,3,0.06513600051403046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,3,0.0672106643517812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,7,0.0681386689345042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,7,0.06974933544794719
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,15,0.08609066406885783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,15,0.08669333656628926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,31,0.09338133533795674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,31,0.08698667089144389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,63,0.0953439970811208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,63,0.09485866626103719
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,127,0.11928000052769978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,127,0.1167093316713969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,255,0.18315200010935465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,255,0.17849600315093994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,1,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,1,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,511,0.3765653371810913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,3,0.009839999799927076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,511,0.3013226588567098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,31,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,3,0.009861333295702934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,63,0.012351999680201212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,7,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,7,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,15,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,15,0.0102186668664217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,63,0.012608000387748083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,127,0.012378666549921036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,127,0.012357333054145178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,1023,0.030981334547201794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,255,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,255,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,511,0.026922665536403656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,511,0.026560001075267792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,1023,0.030991998811562855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,2047,0.04393066465854645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,2047,0.041296000281969704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,4095,0.07496533294518788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,4095,0.06509333352247874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,8191,0.1280586620171865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,8191,0.11430933078130086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,16383,0.23097066084543863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,16383,0.20892266432444254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,32767,0.4397173325220744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,32767,0.3914560079574585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,1,0.11904000242551167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,1,0.1227946678797404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,3,0.12386666735013326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,65535,0.8562133312225342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,3,0.1281760036945343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,7,0.13524799545605978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,65535,0.7644960085550944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,7,0.13358400265375772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,15,0.17710934082667032
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,31,0.17893866697947183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,15,0.16927999258041382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,31,0.17892799774805704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,63,0.18179200092951456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,63,0.1816213329633077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,127,0.22671467065811157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,255,0.3532426754633586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,127,0.22468799352645874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,255,0.3471306562423706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,7,0.27857067187627155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,1,0.2409813404083252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,1,0.2435200015703837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,3,0.26190932591756183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,3,0.25780266523361206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,7,0.28252265850702923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,15,0.3466879924138387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,15,0.34167468547821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,1,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,1,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,3,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,31,0.3503679831822713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,3,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,7,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,31,0.35050666332244873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,63,0.35525866349538165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,7,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,127,0.440282662709554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,63,0.3544693390528361
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,15,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,15,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,127,0.4436853329340617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,31,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,31,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,63,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,63,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,127,0.011978667229413986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,127,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,1023,0.028581333657105763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,255,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,255,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,511,0.018687999496857326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,511,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,1023,0.030117332935333252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,2047,0.06111466884613037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,2047,0.05179733534653982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,16383,0.41913068294525146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,4095,0.11124799648920695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,4095,0.10078932841618855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,8191,0.21265600124994913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,8191,0.18963199853897095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,16383,0.368341326713562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,1,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,1,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,3,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,3,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,7,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,32767,1.1653760274251301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,7,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,15,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,32767,1.1117440064748128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,15,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,31,0.010202666744589806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,31,0.010154666379094124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,63,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,127,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,63,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,127,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,255,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,255,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,511,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,511,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,1023,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,1023,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,2047,0.040864000717798867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,2047,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,4095,0.07166400055090587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,4095,0.05729599793752035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,8191,0.13268799583117166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,8191,0.10405332843462627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,1,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,16383,0.25703465938568115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,16383,0.1988640030225118
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,1,0.008197333042820295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,3,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,3,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,7,0.007647999872763951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,7,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,32767,0.5784266789754232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,32767,0.6350400050481161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,15,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,15,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,31,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,31,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,63,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,63,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,127,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,127,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,255,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,255,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,511,0.00978133330742518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,511,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,1023,0.016399999459584553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,1023,0.01595199977358182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,2047,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,2047,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,8191,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,4095,0.017621333400408428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,4095,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,8191,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,16383,0.02829866607983907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,16383,0.028175999720891316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,32767,0.03882666677236557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,32767,0.03839466720819473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,1,0.008367999767263731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,1,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,3,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,65535,0.0699786643187205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,3,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,7,0.00850133349498113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,65535,0.057999998331069946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,7,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,15,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,15,0.009557333464423815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,31,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,31,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,63,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,63,0.010437333335479101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,127,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,127,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,131071,0.1195146640141805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,255,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,131071,0.10414399703343709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,255,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,511,0.010309333602587381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,511,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,4095,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,1023,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,1023,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,2047,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,2047,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,4095,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,8191,0.028421332438786823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,8191,0.028138667345046997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,16383,0.03849600007136663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,16383,0.03805333375930786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,32767,0.06833600004514058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,32767,0.05700799822807312
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,1,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,1,0.014495999862750372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,3,0.014602666099866232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,3,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,65535,0.11639466881752014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,7,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,65535,0.10199466347694397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,7,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,15,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,15,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,31,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,31,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,131071,0.20587732394536337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,63,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,63,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,131071,0.17575999101003012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,127,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,127,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,255,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,1023,0.033946665624777474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,255,0.019589333484570186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,2047,0.05862933397293091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,2047,0.07491733133792877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,511,0.023728000621000927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,511,0.02288000037272771
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,1023,0.03909866760174433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,8191,0.199455996354421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,8191,0.2561386624972026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,4095,0.1358453333377838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,4095,0.1092693308989207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,1,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,16383,0.5028106768925985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,1,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,3,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,3,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,16383,0.3853813409805298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,7,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,7,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,15,0.008266666904091835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,15,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,31,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,31,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,63,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,63,0.010309333602587381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,127,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,127,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,255,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,255,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,511,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,511,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,1023,0.016271999726692837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,1023,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,2047,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,2047,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,4095,0.028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,4095,0.02812800059715907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,8191,0.03847466657559077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,8191,0.03762666632731756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,16383,0.06507200002670288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,16383,0.055248002211252846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,32767,0.1056106686592102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,32767,0.09152533610661824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,1,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,1,0.014682666709025701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,65535,0.18714133898417154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,3,0.014442666123310724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,65535,0.15832533439000449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,15,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,3,0.01463466634353002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,7,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,7,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,15,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,131071,0.35361067454020184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,31,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,31,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,63,0.0186666672428449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,255,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,131071,0.30027733246485394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,63,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,127,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,127,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,255,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,511,0.037845333417256675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,511,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,1023,0.07396266857783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,1023,0.05832533538341522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,2047,0.13565867145856222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,2047,0.11009066303571065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,4095,0.2564586599667867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,4095,0.20562134186426798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,1,0.024501333634058636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,8191,0.5264960130055746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,1,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,3,0.024458666642506916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,8191,0.4044693311055501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,3,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,31,0.0322080006202062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,7,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,7,0.025914666553338368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,15,0.02613866577545802
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,15,0.026144000391165417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,31,0.03275733441114426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,63,0.03246400008598963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,63,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,127,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,127,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,255,0.04535466432571411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,255,0.03997866561015447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,511,0.06834666430950165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,511,0.06572266419728597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,1023,0.13691199819246927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,1023,0.10854400197664897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,2047,0.26103466749191284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,2047,0.2082186738650004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,1,0.043951998154322304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,3,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,1,0.044778664906819664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,3,0.04480533301830292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,7,0.045594667394955955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,7,0.04688533147176107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,15,0.04722133278846741
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,15,0.04814399778842926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,63,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,31,0.05891199906667074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,31,0.059605335195859276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,63,0.059487998485565186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,127,0.06618666648864746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,127,0.06125866870085398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,255,0.08116800089677174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,255,0.080485333998998
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,511,0.12524267037709555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,511,0.12272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,1,0.008207999790708223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,7,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,1023,0.25805334250132245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,1,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,1023,0.20547199249267578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,3,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,3,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,7,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,15,0.008367999767263731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,63,0.010559999694426855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,15,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,31,0.008656000097592672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,31,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,63,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,127,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,127,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,255,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,255,0.011434666812419891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,511,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,511,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,1023,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,4095,0.035402665535608925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,1023,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,2047,0.028277332584063213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,2047,0.028463999430338543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,4095,0.03579733272393545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,8191,0.055786664287249245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,8191,0.048901334404945374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,16383,0.09514133135477702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,16383,0.07981866598129272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,32767,0.16422933340072632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,32767,0.14029866456985474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,65535,0.3031839927037557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,65535,0.2561066746711731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,1,0.08230933547019958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,1,0.08370133241017659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,3,0.08335999647776286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,3,0.08240533371766408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,7,0.08514666557312012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,15,0.08942400415738423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,7,0.08730666836102803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,15,0.08794666330019633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,131071,0.5144373178482056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,131071,0.3928693135579427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,31,0.11341866850852966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,31,0.1133066713809967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,63,0.12024000287055969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,63,0.11492799719174702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,127,0.12372799714406331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,127,0.12314666310946147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,255,0.15161599715550741
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,255,0.15375999609629312
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,511,0.23779733975728354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,1,0.15682133038838705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,511,0.23430399099985758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,1,0.16133333245913187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,3,0.15752533078193665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,3,0.16146666804949442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,15,0.1735573410987854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,7,0.16389866669972739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,7,0.1683893402417501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,15,0.18076799313227335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,31,0.226474662621816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,127,0.23777600129445395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,31,0.22557334105173746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,1,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,63,0.23285333315531412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,3,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,63,0.23505600293477377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,3,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,255,0.29685332377751666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,7,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,7,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,127,0.23891200621922812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,255,0.29469867547353107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,15,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,15,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,31,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,31,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,63,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,127,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,127,0.012245333443085352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,255,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,255,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,511,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,511,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,1023,0.026928000152111053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,1023,0.02661866694688797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,2047,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,2047,0.031109333038330078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,4095,0.04819199939568838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,4095,0.04091733445723852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,8191,0.08528000116348267
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,8191,0.06629333396752675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,16383,0.14565866192181906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,16383,0.11488533020019531
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,32767,0.26588799556096393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,1,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,32767,0.2039146622021993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,1,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,3,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,3,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,7,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,7,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,65535,0.38813332716623944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,65535,0.5157493352890015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,15,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,15,0.010005333150426546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,31,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,31,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,63,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,255,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,255,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,63,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,127,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,127,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,511,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,511,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,1023,0.04043733328580856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,1023,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,2047,0.07101333141326904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,2047,0.05614933371543884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,8191,0.25492799282073975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,4095,0.13216533263524374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,4095,0.10284266869227092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,8191,0.19312532742818198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,16383,0.3760533332824707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,1,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,16383,0.5058186848958334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,1,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,3,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,7,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,15,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,3,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,7,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,32767,1.2602880001068115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,15,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,31,0.009914666414260864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,32767,1.1344906489054363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,31,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,63,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,63,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,127,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,127,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,255,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,255,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,511,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,511,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,1023,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,1023,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,2047,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,2047,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,4095,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,4095,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,8191,0.027903998891512554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,8191,0.027632000545660656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,16383,0.03844266633192698
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,32767,0.057722667853037514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,16383,0.038133333126703896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,32767,0.06968000034491222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,1,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,1,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,65535,0.11913599570592244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,3,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,65535,0.10669333736101787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,3,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,7,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,7,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,31,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,15,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,15,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,31,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,131071,0.21623466412226358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,63,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,131071,0.19192532698313394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,63,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,127,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,127,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,255,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,255,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,511,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,511,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,1023,0.016602666427691776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,1023,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,2047,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,2047,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,4095,0.029093332588672638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,4095,0.027722666660944622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,8191,0.03858133405447006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,8191,0.038218667109807335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,16383,0.06797866523265839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,16383,0.05676266551017761
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,32767,0.11645866433779399
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,32767,0.10505066315333049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,1,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,1,0.014698666830857595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,3,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,3,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,65535,0.2095573345820109
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,7,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,7,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,65535,0.18248534202575684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,15,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,15,0.016117333124081295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,31,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,131071,0.38838398456573486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,31,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,131071,0.3394613265991211
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,63,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,255,0.022826666633288067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,63,0.019589333484570186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,127,0.01971199984351794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,127,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,255,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,511,0.03902400036652883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,511,0.03389333436886469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,1023,0.07462933162848155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,1023,0.059248000383377075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,2047,0.1354986627896627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,2047,0.10729066530863444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,4095,0.25676800807317096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,4095,0.19795199235280356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,8191,0.49879999955495197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,8191,0.3770986795425415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,1,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,1,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,3,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,3,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,7,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,16383,0.7401333649953207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,16383,0.9928533236185709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,7,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,15,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,15,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,31,0.01022933361430963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,31,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,63,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,63,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,127,0.011962667107582092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,127,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,255,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,255,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,511,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,511,0.016149333367745083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,1023,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,1023,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,2047,0.02824000020821889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,2047,0.028234665592511494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,4095,0.03827733298142751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,4095,0.03721066564321518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,8191,0.0651146670182546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,8191,0.055071999629338585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,16383,0.10417067011197408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,16383,0.09426132837931316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,32767,0.16516266266504923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,32767,0.18796799580256143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,1,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,3,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,65535,0.3102239966392517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,1,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,65535,0.3535840113957723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,7,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,3,0.016565332810084026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,7,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,15,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,15,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,31,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,31,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,63,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,127,0.02223466585079829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,63,0.019808000574509304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,127,0.022106667359670002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,131071,0.685914675394694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,131071,0.6049760182698568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,255,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,255,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,511,0.07445333401362102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,511,0.05776533484458923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,1023,0.13582932949066162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,1023,0.10982400178909302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,2047,0.2574719985326131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,2047,0.2069279948870341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,4095,0.4994773467381795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,4095,0.39444267749786377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,1,0.024490666886170704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,1,0.024800000091393787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,3,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,8191,0.7894933223724365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,3,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,8191,1.0392533143361409
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,7,0.025914666553338368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,7,0.026687999566396076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,15,0.032431999842325844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,15,0.032629333436489105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,31,0.03270400067170461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,31,0.03199466566244761
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,63,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,63,0.03289066751797994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,127,0.045365333557128906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,127,0.03998400022586187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,255,0.06859733164310455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,255,0.0656160016854604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,511,0.1085653305053711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,511,0.13776000340779623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,1023,0.25964266061782837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,1023,0.20791999499003092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,2047,0.501530647277832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,1,0.04459733267625173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,1,0.04345066845417023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,2047,0.39717332522074383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,3,0.045567999283472695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,3,0.04677866895993551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,7,0.04708800216515859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,15,0.058874666690826416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,7,0.049072002371152244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,15,0.05949333310127258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,31,0.06006933252016703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,31,0.06204266846179962
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,63,0.06117333471775055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,127,0.08196266492207845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,63,0.06625600159168243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,127,0.08097599943478902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,255,0.12582932909329733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,255,0.12270399928092957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,1,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,511,0.26050132513046265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,511,0.20642666021982828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,1,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,1023,0.5052533149719238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,3,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,3,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,1023,0.402021328608195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,7,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,7,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,15,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,15,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,31,0.010191999996701876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,31,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,63,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,63,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,127,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,127,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,255,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,255,0.011962667107582092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,511,0.016650666793187458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,511,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,1023,0.028170667588710785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,1023,0.027808000644048054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,2047,0.0358240008354187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,2047,0.034976000587145485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,4095,0.05425066749254862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,4095,0.048613334695498146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,8191,0.09425600369771321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,8191,0.0841439962387085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,16383,0.16566399733225504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,16383,0.14459733168284097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,32767,0.30239466826121014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,32767,0.26791999737421673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,65535,0.57696000734965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,65535,0.506986657778422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,1,0.08401067058245341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,3,0.08441600203514099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,7,0.08785600463549297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,3,0.08738666772842407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,1,0.0832586685816447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,7,0.09092799822489421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,31,0.121888001759847
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,15,0.11318932970364888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,15,0.11534399787584941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,131071,0.9948639869689941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,31,0.11732799808184306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,131071,0.8018080393473307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,127,0.1540426711241404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,63,0.12405866384506226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,63,0.12434132893880208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,127,0.15269333124160767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,255,0.23937066396077475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,255,0.23526400327682495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,511,0.4964053233464559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,1,0.15636799732844034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,511,0.3978826602300008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,1,0.16155200203259787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,15,0.23034133513768515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,3,0.16872000694274902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,7,0.18582934141159058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,7,0.17929067214330038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,15,0.23038933674494425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,3,0.16637866695721945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,63,0.2392586668332418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,31,0.23632532358169556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,1,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,31,0.2358986735343933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,1,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,3,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,63,0.23903467257817587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,3,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,7,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,127,0.2990986704826355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,7,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,127,0.29665066798528034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,15,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,15,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,255,0.4672853151957194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,31,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,31,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,63,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,63,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,127,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,127,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,255,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,255,0.012175999581813812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,511,0.026261332134405773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,511,0.026517334083716076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,1023,0.031136001149813335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,1023,0.030784000953038532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,2047,0.04809066653251648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,2047,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,4095,0.08355200290679932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,4095,0.06884266436100006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,8191,0.1439733306566874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,8191,0.12038399775822957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,16383,0.2643839915593465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,16383,0.2147093415260315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,32767,0.5031840006510416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,1,0.008421333506703377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,1,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,32767,0.4049333333969116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,3,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,3,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,7,0.008325333396593729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,7,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,15,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,15,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,31,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,65535,0.7929226557413737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,65535,0.9960906505584717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,31,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,63,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,63,0.01051733394463857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,127,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,127,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,255,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,255,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,511,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,511,0.012175999581813812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,1023,0.026928000152111053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,255,0.4594080050786336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,1023,0.026608000199000042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,2047,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,2047,0.031061333914597828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,4095,0.04065066576004028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,4095,0.047600001096725464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,8191,0.08097066481908162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,8191,0.06549866497516632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,16383,0.11326932907104492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,16383,0.1454026699066162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,1,0.007871999715765318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,1,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,32767,0.26603732506434125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,3,0.007706666365265846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,32767,0.20529067516326904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,3,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,7,0.007823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,7,0.009343999748428663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,15,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,15,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,65535,0.5123039881388346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,31,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,31,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,63,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,63,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,65535,0.38788799444834393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,127,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,127,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,255,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,255,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,511,0.010197333370645842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,4095,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,511,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,1023,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,1023,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,2047,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,2047,0.016442666451136272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,4095,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,8191,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,8191,0.01642666632930438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,16383,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,16383,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,32767,0.01801066721479098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,32767,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,65535,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,65535,0.022511998812357586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,1,0.007925333455204964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,1,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,3,0.008165333420038223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,3,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,7,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,31,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,7,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,15,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,15,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,131071,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,31,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,63,0.008634666601816813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,63,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,131071,0.045194665590922035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,127,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,127,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,255,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,255,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,511,0.009904000287254652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,511,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,1023,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,1023,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,2047,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,2047,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,4095,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,4095,0.016586666305859882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,8191,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,8191,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,16383,0.02802666773398717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,16383,0.027717334528764088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,32767,0.038032000263532005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,32767,0.03791466603676478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,65535,0.06861866513888042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,1,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,1,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,65535,0.058415999015172325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,3,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,3,0.008810666700204214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,7,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,7,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,15,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,15,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,31,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,31,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,131071,0.10441066821416219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,63,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,131071,0.11957866946856181
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,63,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,127,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,127,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,255,0.011962667107582092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,255,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,511,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,511,0.013568000247081121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,1023,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,4095,0.07166400055090587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,1023,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,2047,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,2047,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,4095,0.057674666245778404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,8191,0.1321386694908142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,8191,0.10449066758155823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,1,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,16383,0.2574666738510132
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,1,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,3,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,16383,0.1969333291053772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,3,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,32767,0.5732213258743286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,7,0.007967999825874964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,7,0.009488000224033991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,15,0.008021333565314611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,15,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,31,0.008432000254591307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,31,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,63,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,32767,0.6338186661402384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,63,0.00996800015370051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,127,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,127,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,255,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,255,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,511,0.010250666489203772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,511,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,1023,0.016271999726692837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,1023,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,2047,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,2047,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,4095,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,4095,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,8191,0.028175999720891316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,8191,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,16383,0.03867733230193456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,16383,0.03790933390458425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,32767,0.06856533388296764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,32767,0.05719466507434845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,65535,0.11563199758529663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,1,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,1,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,3,0.01469333345691363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,65535,0.10205333431561787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,3,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,7,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,7,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,15,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,15,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,131071,0.17603200674057007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,31,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,31,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,63,0.018511999398469925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,131071,0.2049973408381144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,63,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,127,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,127,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,255,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,255,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,511,0.022917332748572033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,511,0.022687998910744984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,1023,0.03919466584920883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,1023,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,2047,0.07464533547560374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,2047,0.05864533285299937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,4095,0.10850666960080464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,4095,0.13616533080736795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,8191,0.25569599866867065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,1,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,8191,0.20066134134928384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,1,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,3,0.01434133326013883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,7,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,3,0.014629332969586054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,15,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,16383,0.5028533140818278
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,7,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,16383,0.38385601838429767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,15,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,31,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,63,0.01810666670401891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,31,0.019909333437681198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,63,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,127,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,255,0.02203733225663503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,127,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,255,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,511,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,511,0.03401066611210505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,1023,0.07401066521803538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,1023,0.05791999896367391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,2047,0.13482667009035745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,2047,0.10951466361681621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,4095,0.20434133211771646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,4095,0.2561759948730469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,1,0.02455466737349828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,1,0.024853333830833435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,3,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,7,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,3,0.024858665963013966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,15,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,7,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,15,0.026613332331180573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,31,0.032170665760835014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,31,0.03263466556866964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,63,0.032602667808532715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,63,0.0324799989660581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,127,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,127,0.032602667808532715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,255,0.04506133496761322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,255,0.040234667559464775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,511,0.0685280015071233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,511,0.06526400148868561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,1023,0.10776533683141072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,1023,0.1367093324661255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,1,0.008042666440208754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,7,0.008192000289758047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,1,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,3,0.008432000254591307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,2047,0.20619199673334757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,3,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,63,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,2047,0.2602720061937968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,7,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,15,0.008170666793982187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,15,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,31,0.0085333331177632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,31,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,63,0.010191999996701876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,511,0.011946666985750198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,127,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,127,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,255,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,2047,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,255,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,511,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,1023,0.01628799984852473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,1023,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,2047,0.01758933315674464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,4095,0.027829334139823914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,4095,0.027797333896160126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,8191,0.03860799968242645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,8191,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,16383,0.06390933195749919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,16383,0.05547733108202616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,32767,0.10505599776903789
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,32767,0.09195733070373535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,65535,0.18648000558217367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,65535,0.15942399700482687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,1,0.044112001856168113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,1,0.04466133316357931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,7,0.045109331607818604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,3,0.043951998154322304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,3,0.04489066700140635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,15,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,7,0.046394666035970054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,15,0.04738666613896688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,31,0.05886933207511902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,131071,0.35491732756296795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,31,0.06011733412742615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,131071,0.302074670791626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,63,0.059279998143514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,63,0.059893334905306496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,127,0.06642666459083557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,127,0.06092800199985504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,511,0.12166933218638103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,255,0.0800853321949641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,255,0.08130133152008057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,511,0.1253493328889211
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,1023,0.2577173312505086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,1023,0.2049600084622701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,1,0.08341866731643677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,1,0.08212266862392426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,3,0.08229866623878479
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,3,0.08418666323026021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,15,0.08949866890907288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,7,0.08481599887212117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,7,0.08700799942016602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,15,0.0883733332157135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,63,0.11552533507347107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,31,0.11345066626866658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,31,0.11340799927711487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,63,0.12106666962305705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,1,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,1,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,127,0.12390933434168498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,3,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,255,0.15103466312090555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,127,0.12310399611790974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,3,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,255,0.15333867073059082
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,7,0.008192000289758047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,7,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,15,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,15,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,31,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,31,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,63,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,511,0.23850667476654053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,63,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,511,0.23349332809448242
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,127,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,127,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,255,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,255,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,511,0.010469333579142889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,1023,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,511,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,1023,0.01588800052801768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,2047,0.028394666810830433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,2047,0.027749332288901012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,4095,0.036144000788529716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,4095,0.03522133330504099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,8191,0.05502399802207947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,8191,0.048325334986050926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,16383,0.09443733096122742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,16383,0.07943999767303467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,32767,0.16359466314315796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,32767,0.14072533448537192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,1,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,1,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,3,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,3,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,7,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,65535,0.2582239905993144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,15,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,7,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,65535,0.30293865998586017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,15,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,31,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,31,0.010512000570694605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,63,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,63,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,127,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,127,0.012175999581813812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,255,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,255,0.012351999680201212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,511,0.026917333404223125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,511,0.026047999660174053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,131071,0.39238401254018146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,1023,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,1023,0.03089066594839096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,131071,0.5118666489919027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,2047,0.047237331668535866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,2047,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,4095,0.06795733173688252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,4095,0.07978133360544841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,8191,0.14410133163134256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,8191,0.11854933698972066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,16383,0.26402666171391803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,16383,0.21566933393478394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,1,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,1,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,3,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,32767,0.5023146470387777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,32767,0.40426135063171387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,3,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,7,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,7,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,15,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,15,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,65535,0.9925440152486166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,31,0.009839999799927076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,31,0.010005333150426546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,63,0.01157333329319954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,63,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,65535,0.7915999889373779
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,127,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,255,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,127,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,255,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,511,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,511,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,1023,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,1023,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,2047,0.016341333587964375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,2047,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,4095,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,4095,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,8191,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,8191,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,16383,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,16383,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,32767,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,32767,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,1,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,1,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,65535,0.0447573314110438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,3,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,65535,0.03251733382542928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,15,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,7,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,7,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,63,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,15,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,31,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,63,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,127,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,131071,0.07513600091139476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,131071,0.06155733267466227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,127,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,255,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,255,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,511,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,511,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,1023,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,1023,0.016645333419243496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,2047,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,2047,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,4095,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,16383,0.03788266579310099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,4095,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,8191,0.027552001178264618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,8191,0.027845333019892376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,16383,0.03859733293453852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,32767,0.06761066615581512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,32767,0.058101331194241844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,1,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,1,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,3,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,65535,0.11940800150235494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,3,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,7,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,7,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,65535,0.10682666301727295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,15,0.00985599992175897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,15,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,31,0.011690666278203329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,31,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,131071,0.19150400161743164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,63,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,131071,0.21558932463328043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,63,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,127,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,127,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,255,0.013861333330472311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,255,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,511,0.01846933364868164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,511,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,1023,0.040661332507928215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,1023,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,2047,0.07236800094445546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,2047,0.056464001536369324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,4095,0.10363200306892395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,4095,0.13225066661834717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,8191,0.2536853353182475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,8191,0.1935946742693583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,1,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,16383,0.504202683766683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,1,0.016602666427691776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,3,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,16383,0.3760106563568115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,7,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,3,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,7,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,15,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,31,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,31,0.00985599992175897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,15,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,63,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,63,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,127,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,32767,1.1371839841206868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,127,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,32767,1.2647039890289307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,255,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,255,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,511,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,511,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,1023,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,1023,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,2047,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,2047,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,4095,0.02826133370399475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,4095,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,16383,0.06830400228500366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,8191,0.03862400104602178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,8191,0.0378560001651446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,16383,0.057130664587020874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,32767,0.11499733726183574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,32767,0.10548266768455505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,1,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,1,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,65535,0.20690133174260458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,3,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,3,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,65535,0.18314667542775473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,7,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,7,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,15,0.015882667154073715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,15,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,63,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,31,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,31,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,131071,0.38788799444834393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,63,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,255,0.023728000621000927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,127,0.019776000330845516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,127,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,255,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,1023,0.059338668982187905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,131071,0.33798400561014813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,511,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,511,0.0341333324710528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,1023,0.07437333464622498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,2047,0.13563199838002524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,2047,0.10666132966677348
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,4095,0.25614933172861737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,4095,0.19887999693552652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,1,0.014352000008026758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,8191,0.49798401196797687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,1,0.014357333381970724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,8191,0.3777066469192505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,3,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,3,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,7,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,15,0.01810666670401891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,7,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,16383,0.9938773314158121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,15,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,31,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,16383,0.7390773296356201
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,31,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,63,0.01836799954374631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,63,0.018778666853904724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,127,0.02276266614596049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,127,0.022175999979178112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,255,0.03772799919048945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,255,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,511,0.07436266541481018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,1023,0.13497066497802734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,511,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,1023,0.11020267009735107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,2047,0.256933331489563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,2047,0.2063466707865397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,1,0.02462399999300639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,1,0.024304000039895374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,4095,0.3956799904505412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,4095,0.5000640153884888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,7,0.025807999074459076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,7,0.026522666215896606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,3,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,3,0.026026666164398193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,15,0.03195200115442276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,15,0.03278933217128118
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,31,0.032746667663256325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,31,0.034917332231998444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,63,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,63,0.032746667663256325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,127,0.04514666895071665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,255,0.06819733480612437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,127,0.04051200052102407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,255,0.0660693347454071
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,511,0.13724799950917563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,511,0.10897599657376607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,1023,0.2597813407580058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,1,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,1,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,1023,0.20781334241231283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,3,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,2047,0.5002719958623251
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,3,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,7,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,7,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,15,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,15,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,2047,0.39609599113464355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,31,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,31,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,63,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,63,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,127,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,127,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,255,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,1023,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,255,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,511,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,511,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,1023,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,2047,0.027989332874615986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,2047,0.028117333849271137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,4095,0.038245332737763725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,4095,0.037231999138991036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,8191,0.06451733410358429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,8191,0.05565333366394043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,16383,0.10593600074450175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,16383,0.09477333227793376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,32767,0.18750399351119995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,32767,0.16576533516248068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,65535,0.35315199693044025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,1,0.043605332573254905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,65535,0.3115466634432475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,1,0.04524266719818115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,3,0.045237332582473755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,7,0.04659733176231384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,3,0.04650666813055674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,7,0.04844266672929128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,131071,0.6859306494394938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,31,0.059402664502461754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,131071,0.6070880095163981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,15,0.05927466849486033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,15,0.059936001896858215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,31,0.061535999178886414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,63,0.06576000154018402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,127,0.08012266457080841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,63,0.06121600170930227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,127,0.08148266871770223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,255,0.12669333815574646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,255,0.12237333257993062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,511,0.25917865832646686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,511,0.20625599225362143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,1023,0.39827199776967365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,1023,0.5054293473561605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,1,0.08188266555468242
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,1,0.08430400490760803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,7,0.09052266677220662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,3,0.08455466230710347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,15,0.11483200391133626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,3,0.08742400010426839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,7,0.0886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,63,0.12405866384506226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,15,0.11283733447392781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,127,0.15405866503715515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,31,0.12236266334851582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,31,0.11746133367220561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,63,0.12353066603342693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,1,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,1,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,127,0.1530080040295919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,3,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,3,0.009557333464423815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,255,0.23468265930811563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,7,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,255,0.2405386765797933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,7,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,15,0.009904000287254652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,511,0.3971039851506551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,15,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,31,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,31,0.010319999729593595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,63,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,63,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,511,0.016480000068744022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,511,0.4933919906616211
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,127,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,127,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,255,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,255,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,511,0.016501333564519882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,1023,0.027930667002995808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,1023,0.02792000025510788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,2047,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,2047,0.03489600121974945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,4095,0.05403733253479004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,4095,0.04859733581542969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,8191,0.09326933821042378
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,8191,0.08236800134181976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,16383,0.1639893352985382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,16383,0.14551466703414917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,32767,0.3022293249766032
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,32767,0.2677226662635803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,1,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,1,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,3,0.007765333478649457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,3,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,7,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,7,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,65535,0.5080639918645223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,65535,0.5767680009206136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,15,0.008112000301480293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,15,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,31,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,31,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,63,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,255,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,63,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,127,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,127,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,255,0.011946666985750198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,511,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,511,0.01201066623131434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,131071,0.7984639803568522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,1023,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,1023,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,131071,0.9945120016733805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,2047,0.028197333216667175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,2047,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,4095,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,4095,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,8191,0.05514666438102722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,8191,0.04784533381462097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,16383,0.09490133325258891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,16383,0.08004799981911977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,32767,0.16358400384585062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,32767,0.14218133687973022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,1,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,1,0.008879999940594038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,3,0.007978666573762894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,7,0.007637333124876022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,3,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,7,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,65535,0.2578879992167155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,15,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,15,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,65535,0.3025546669960022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,31,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,31,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,63,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,63,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,127,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,127,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,255,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,255,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,131071,0.5107786655426025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,511,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,511,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,1023,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,1023,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,131071,0.3919946750005086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,2047,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,8191,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,2047,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,4095,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,4095,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,8191,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,16383,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,16383,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,32767,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,32767,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,65535,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,65535,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,1,0.008629333227872849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,1,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,3,0.008122666428486506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,3,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,7,0.007690666864315669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,7,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,63,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,15,0.008256000156203905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,131071,0.022730665902296703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,131071,0.022341333329677582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,15,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,31,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,31,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,63,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,127,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,127,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,255,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,255,0.011690666278203329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,511,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,511,0.011690666278203329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,1023,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,1023,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,2047,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,2047,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,4095,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,4095,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,8191,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,8191,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,16383,0.017808000246683758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,16383,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,32767,0.01806933308641116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,32767,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,1,0.008058666562040647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,65535,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,1,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,65535,0.02279466638962428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,3,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,3,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,7,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,7,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,15,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,15,0.009573333586255709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,131071,0.032373333970705666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,31,0.008245333408315977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,131071,0.04590400060017904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,31,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,63,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,63,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,127,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,127,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,255,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,255,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,511,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,511,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,1023,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,1023,0.026506667335828144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,2047,0.03108799954255422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,2047,0.03070399910211563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,4095,0.04735999802748362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,4095,0.04131733377774557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,8191,0.08434133728345235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,8191,0.06649599969387054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,16383,0.14621333281199136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,16383,0.11541333794593811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,1,0.008000000069538752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,1,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,32767,0.2055786649386088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,7,0.008058666562040647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,32767,0.2661386728286743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,3,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,3,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,7,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,15,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,15,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,31,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,31,0.008421333506703377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,63,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,127,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,511,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,127,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,255,0.008810666700204214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,255,0.011733333269755045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,511,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,1023,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,65535,0.5110506614049276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,1023,0.01588800052801768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,65535,0.3872213363647461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,2047,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,2047,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,4095,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,4095,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,8191,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,8191,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,16383,0.02890666574239731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,16383,0.02770666778087616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,32767,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,32767,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,1,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,65535,0.06970666845639546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,65535,0.058176000912984215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,1,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,7,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,3,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,3,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,7,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,15,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,15,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,31,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,31,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,131071,0.11884799599647522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,63,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,131071,0.10531733433405559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,63,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,127,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,127,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,255,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,255,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,511,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,511,0.01340266689658165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,1023,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,1023,0.01863466699918111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,4095,0.05764799813429514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,2047,0.04073066761096319
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,2047,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,4095,0.07150400181611379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,8191,0.13185600439707437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,8191,0.10487999518712361
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,1,0.014368000129858652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,1,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,16383,0.19710934162139893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,3,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,16383,0.25667200485865277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,3,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,7,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,15,0.014271999398867289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,7,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,15,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,31,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,31,0.016016000260909397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,63,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,63,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,127,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,127,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,255,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,255,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,511,0.0235359991590182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,32767,0.5799200137456259
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,511,0.022634667654832203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,32767,0.6285013357798258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,1023,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,1023,0.03393599887688955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,2047,0.07495999832948048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,2047,0.05923733115196228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,4095,0.13546666502952576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,1,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,1,0.014416000495354334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,4095,0.10961600144704182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,3,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,3,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,7,0.014661333213249842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,8191,0.255621333916982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,7,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,8191,0.20139733950297037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,15,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,15,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,31,0.018661333868900936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,63,0.018101333330074947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,31,0.018725333114465077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,63,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,255,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,127,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,511,0.038362666964530945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,127,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,255,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,511,0.03309333324432373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,1023,0.07469866673151652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,1023,0.05871999760468801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,1,0.008127999802430471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,1,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,2047,0.13556266824404398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,2047,0.10979732871055603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,3,0.007967999825874964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,3,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,7,0.008122666428486506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,7,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,4095,0.25706666707992554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,15,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,4095,0.20598934094111124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,15,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,31,0.008245333408315977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,31,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,63,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,63,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,127,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,127,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,255,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,255,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,511,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,511,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,1023,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,1023,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,2047,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,2047,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,4095,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,4095,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,8191,0.028117333849271137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,8191,0.028336000939210255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,16383,0.03753600021203359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,16383,0.03868266691764196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,32767,0.06818133095900218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,32767,0.057461331288019814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,1,0.024234667420387268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,65535,0.11580800016721089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,65535,0.10254399975140889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,1,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,3,0.02456533412138621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,7,0.02497600018978119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,3,0.02496533344189326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,7,0.02569599946339925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,15,0.02624533325433731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,131071,0.17686933279037476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,15,0.02681066592534383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,31,0.0322026660044988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,131071,0.2029973268508911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,31,0.032933334509531655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,63,0.032501332461833954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,63,0.03306666761636734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,127,0.033674667278925575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,127,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,255,0.045647998650868736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,255,0.04021333406368891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,511,0.06881600121657054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,511,0.06574933230876923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,1023,0.13867732882499695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,1023,0.10873066385587056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,2047,0.20811732610066733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,2047,0.2604106664657593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,1,0.04380266865094503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,1,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,3,0.04385066529115041
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,3,0.04474666714668274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,7,0.04571199913819631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,7,0.04642133414745331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,15,0.046816001335779824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,63,0.059119999408721924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,15,0.048063998421033226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,31,0.05905599892139435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,31,0.0598826656738917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,63,0.06020266811052958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,127,0.06674666702747345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,1,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,255,0.08007466793060303
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,1,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,127,0.06121066709359487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,3,0.007776000226537387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,3,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,255,0.08141333361466725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,7,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,7,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,15,0.008170666793982187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,31,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,15,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,511,0.12612799803415933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,31,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,511,0.12317867080370586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,63,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,63,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,127,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,127,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,255,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,1023,0.20631466309229532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,255,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,511,0.010346666599313417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,1023,0.26051199436187744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,511,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,1023,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,1023,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,2047,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,2047,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,4095,0.028330666323502857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,4095,0.028005334238211315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,8191,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,8191,0.037818667789300285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,16383,0.06396799782911937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,16383,0.05539200206597646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,32767,0.10700800021489461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,32767,0.09167466560999553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,1,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,1,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,3,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,65535,0.18677333990732828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,3,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,7,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,7,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,15,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,31,0.010224000240365664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,65535,0.16057599584261575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,15,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,31,0.010234666367371878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,63,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,63,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,131071,0.35309334595998126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,127,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,127,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,255,0.012234666695197424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,1023,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,1023,0.028117333849271137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,2047,0.03477866699298223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,255,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,511,0.01651200031240781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,511,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,131071,0.30296534299850464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,2047,0.03585066646337509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,4095,0.054805333415667214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,4095,0.04850666721661886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,8191,0.09426132837931316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,8191,0.08278400202592213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,16383,0.16380799810091654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,16383,0.14686399698257446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,32767,0.2695786754290263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,32767,0.30166399478912354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,1,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,1,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,3,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,3,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,65535,0.5765493313471476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,7,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,7,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,15,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,65535,0.5092159907023112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,15,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,31,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,31,0.009925333162148794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,63,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,63,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,255,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,127,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,127,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,255,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,511,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,511,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,1023,0.016506666938463848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,1023,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,2047,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,2047,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,131071,0.9948106606801351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,131071,0.7995306650797526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,4095,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,4095,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,8191,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,8191,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,16383,0.01657066618402799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,16383,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,32767,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,32767,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,1,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,65535,0.023498666783173878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,1,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,65535,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,3,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,3,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,7,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,7,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,15,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,15,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,31,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,31,0.010165333126982054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,63,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,63,0.011653333902359009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,127,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,131071,0.045408000548680626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,131071,0.032474666833877563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,127,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,255,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,255,0.011663999408483505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,511,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,511,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,1023,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,1023,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,8191,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,2047,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,2047,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,4095,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,4095,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,8191,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,16383,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,16383,0.017621333400408428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,32767,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,32767,0.02271999915440877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,1,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,1,0.009872000043590864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,3,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,65535,0.04497066636880239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,3,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,7,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,65535,0.032885332902272545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,31,0.010506667196750641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,7,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,15,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,63,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,15,0.009973333527644476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,131071,0.07544533411661784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,131071,0.06224533418814341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,63,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,127,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,127,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,255,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,255,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,511,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,511,0.026848000784715016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,1023,0.03176533430814743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,1023,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,2047,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,2047,0.04224533339341482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,4095,0.08430400490760803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,4095,0.06861333549022675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,8191,0.14454399545987448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,8191,0.12071466445922852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,16383,0.26335465908050537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,16383,0.21708800395329794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,1,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,1,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,3,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,3,0.00985599992175897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,7,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,7,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,15,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,32767,0.40745067596435547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,32767,0.5033119916915894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,15,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,31,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,31,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,63,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,63,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,127,0.012122667084137598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,127,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,255,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,255,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,511,0.0163680004576842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,511,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,1023,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,1023,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,4095,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,2047,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,2047,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,65535,0.7907093365987142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,65535,0.9898133277893066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,4095,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,8191,0.028565332293510437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,8191,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,16383,0.03900266687075297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,16383,0.03828266759713491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,32767,0.06881066660086314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,32767,0.05840533475081126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,1,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,1,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,3,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,65535,0.10820800065994263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,3,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,7,0.009546666716535887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,65535,0.12019200126330058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,7,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,15,0.010490667074918747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,15,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,31,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,31,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,63,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,63,0.011920000116030375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,131071,0.1934986710548401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,127,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,127,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,255,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,255,0.013951999445756277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,131071,0.2164586583773295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,511,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,511,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,1023,0.04099733382463455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,1023,0.029167999823888142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,2047,0.07123200098673503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,2047,0.056074668963750206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,4095,0.10249066352844238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,4095,0.13406399885813394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,8191,0.25484800338745117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,8191,0.19402132431666055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,1,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,1,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,3,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,3,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,7,0.014597332725922266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,16383,0.5037813186645508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,7,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,15,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,16383,0.3747306664784749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,15,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,31,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,63,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,31,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,63,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,127,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,127,0.01971199984351794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,32767,1.1252480347951253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,255,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,255,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,511,0.03899200012286504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,511,0.03402666747570038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,1023,0.07453866799672444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,1023,0.059445331494013466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,32767,1.2582133611043294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,2047,0.10703466335932414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,2047,0.13512000441551208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,4095,0.25647467374801636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,4095,0.19821866353352866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,1,0.014416000495354334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,1,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,3,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,7,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,3,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,8191,0.49828799565633136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,8191,0.3771626551946004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,7,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,15,0.01811733345190684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,15,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,31,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,31,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,63,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,63,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,127,0.0227360005180041
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,127,0.02162133405605952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,255,0.03812266637881597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,255,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,511,0.07427200178305308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,511,0.05829866727193197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,1023,0.1357866624991099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,1023,0.10990933577219646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,2047,0.25729600588480633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,2047,0.20563733577728271
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,1,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,1,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,3,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,3,0.010266666611035665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,15,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,7,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,7,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,15,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,31,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,4095,0.39418665568033856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,4095,0.5021386543909708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,31,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,63,0.011621333658695221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,63,0.011653333902359009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,127,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,127,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,255,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,255,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,511,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,511,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,1023,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,1023,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,2047,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,2047,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,4095,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,4095,0.027813332776228588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,8191,0.038592000802357994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,8191,0.03792533278465271
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,16383,0.06825066606203715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,16383,0.05740800003210703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,32767,0.10518933335940044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,32767,0.11481600006421407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,65535,0.20670400063196817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,65535,0.18407466014226279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,3,0.02459733436505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,1,0.02420266717672348
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,1,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,3,0.025749333202838898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,7,0.025829332570234936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,15,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,7,0.02664533257484436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,15,0.03257066756486893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,31,0.03252800057331721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,131071,0.38947200775146484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,131071,0.3407466808954875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,63,0.03233599911133448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,31,0.0324799989660581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,63,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,127,0.04571733375390371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,127,0.040037333965301514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,255,0.06866666674613953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,511,0.10839466253916423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,255,0.06595199803511302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,511,0.13779733578364053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,1023,0.2080693244934082
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,1,0.04450133442878723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,1023,0.2617013255755107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,1,0.043791999419530235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,2047,0.5015253225962321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,2047,0.39553598562876385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,3,0.04516266783078512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,7,0.047877331574757896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,3,0.046682665745417275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,7,0.04690133531888326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,15,0.05932266513506571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,15,0.059765333930651345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,63,0.06086933116118113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,31,0.06111466884613037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,31,0.05974400043487549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,63,0.06621333460013072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,127,0.0825493335723877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,127,0.08057599763075511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,1,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,1,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,511,0.2608533302942912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,255,0.1264479955037435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,3,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,3,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,255,0.1227946678797404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,7,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,15,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,511,0.2071359952290853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,7,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,15,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,31,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,31,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,63,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,63,0.011637333780527115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,127,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,127,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,255,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,1023,0.5063573519388834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,255,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,1023,0.399946649869283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,511,0.01600533351302147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,511,0.015813333292802174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,1023,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,4095,0.03817066550254822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,1023,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,2047,0.02792533238728841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,2047,0.027866666515668232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,4095,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,8191,0.06470400094985962
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,8191,0.05529066423575083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,16383,0.10538132985432942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,16383,0.09463999668757121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,32767,0.1877280076344808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,32767,0.16646933555603027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,1,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,1,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,3,0.00814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,3,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,65535,0.35347731908162433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,7,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,65535,0.31377599636713666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,7,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,15,0.008117333054542542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,15,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,31,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,31,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,63,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,63,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,127,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,127,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,255,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,131071,0.6854986349741617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,255,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,131071,0.6053653160730997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,511,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,511,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,1023,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,1023,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,2047,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,2047,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,4095,0.027749332288901012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,16383,0.06523199876149495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,4095,0.028016000986099243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,8191,0.038405333956082664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,8191,0.03783999880154928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,16383,0.05570133527119955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,32767,0.10664533575375874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,32767,0.09288533528645833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,1,0.008026666939258575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,1,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,3,0.007978666573762894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,3,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,7,0.007957333077987036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,65535,0.186191995938619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,31,0.008256000156203905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,7,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,15,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,15,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,65535,0.16220266620318094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,31,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,63,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,63,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,127,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,127,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,255,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,255,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,511,0.0102186668664217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,511,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,1023,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,1023,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,2047,0.016480000068744022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,2047,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,4095,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,131071,0.3521759907404582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,131071,0.3059413234392802
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,4095,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,8191,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,8191,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,16383,0.016719999412695568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,16383,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,32767,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,65535,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,32767,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,1,0.007760000104705493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,65535,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,1,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,3,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,3,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,7,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,7,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,15,0.008266666904091835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,15,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,131071,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,31,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,31,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,63,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,131071,0.01777600000301997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,63,0.010330666477481524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,127,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,127,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,255,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,255,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,511,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,511,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,1023,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,1023,0.016042667130629223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,2047,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,2047,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,4095,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,16383,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,4095,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,8191,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,8191,0.01758933315674464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,16383,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,32767,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,32767,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,1,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,1,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,65535,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,3,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,3,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,7,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,65535,0.01806933308641116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,7,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,15,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,31,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,15,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,31,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,127,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,63,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,63,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,127,0.012245333443085352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,131071,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,255,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,255,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,131071,0.022490667800108593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,511,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,511,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,1023,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,1023,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,2047,0.028277332584063213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,2047,0.028250666956106823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,4095,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,4095,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,8191,0.05579733351866404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,8191,0.049039999643961586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,16383,0.09539733330408733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,16383,0.08115733166535695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,1,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,1,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,3,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,32767,0.16946667432785034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,3,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,32767,0.14356799920399985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,7,0.008325333396593729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,7,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,15,0.007978666573762894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,15,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,31,0.008341333518425623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,31,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,65535,0.3025280038515727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,63,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,127,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,63,0.010186666622757912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,127,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,255,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,255,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,511,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,511,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,65535,0.25992000102996826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,2047,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,1023,0.01603200038274129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,1023,0.01575999955336253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,2047,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,4095,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,8191,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,4095,0.016480000068744022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,8191,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,16383,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,16383,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,32767,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,32767,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,65535,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,1,0.008421333506703377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,1,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,3,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,65535,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,3,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,7,0.008261333530147871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,131071,0.5114560127258301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,7,0.009872000043590864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,131071,0.39327998956044513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,15,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,15,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,31,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,31,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,131071,0.04603200157483419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,63,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,131071,0.03264000018437704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,63,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,127,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,127,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,255,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,255,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,511,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,511,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,1023,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,1023,0.026602665583292644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,2047,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,2047,0.031167998909950256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,4095,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,4095,0.04693333307902018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,8191,0.08432533343633015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,8191,0.06706133484840393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,16383,0.14590400457382202
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,1,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,16383,0.1164959967136383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,1,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,3,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,3,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,7,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,15,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,7,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,32767,0.20755199591318765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,15,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,31,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,31,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,32767,0.26873600482940674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,63,0.011653333902359009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,63,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,127,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,255,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,127,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,255,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,511,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,511,0.013376000026861826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,1023,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,1023,0.01820266619324684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,65535,0.5125973224639893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,2047,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,2047,0.0406986673672994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,4095,0.07233066856861115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,4095,0.058090666929880776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,8191,0.1051680048306783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,8191,0.13300266861915588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,1,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,3,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,1,0.014639999717473984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,3,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,65535,0.3909226655960083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,7,0.0143306665122509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,15,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,7,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,16383,0.2563573320706685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,15,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,31,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,31,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,63,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,16383,0.19909866650899252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,63,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,127,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,255,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,127,0.019461333751678467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,511,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,255,0.019658666104078293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,511,0.022330666581789654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,1023,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,2047,0.059562668204307556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,1023,0.03404266635576884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,1,0.0081386665503184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,2047,0.07532800237337749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,1,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,3,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,3,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,4095,0.1095360020796458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,7,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,4095,0.1362933317820231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,7,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,15,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,15,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,31,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,31,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,8191,0.20269866784413657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,63,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,63,0.010010666524370512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,127,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,127,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,255,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,8191,0.25737067063649494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,255,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,511,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,511,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,1023,0.016149333367745083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,1023,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,2047,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,8191,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,2047,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,4095,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,4095,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,8191,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,16383,0.02829866607983907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,16383,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,32767,0.03833599885304769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,32767,0.03797333439191183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,65535,0.0690719981988271
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,65535,0.05791999896367391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,1,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,1,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,7,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,3,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,7,0.014538666854302088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,3,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,131071,0.11937600374221802
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,15,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,15,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,131071,0.10588799913724263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,31,0.01834133391578992
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,31,0.018426666657129925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,127,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,63,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,63,0.018570666511853535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,255,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,127,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,255,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,511,0.03316800047953924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,511,0.038245332737763725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,1023,0.05824000140031179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,1023,0.07459733386834462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,2047,0.10981333255767822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,1,0.023984000086784363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,2047,0.13688533504803976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,3,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,1,0.02495466669400533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,7,0.024869332710901897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,7,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,15,0.02565866708755493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,3,0.02491733431816101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,4095,0.2062986691792806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,4095,0.25679999589920044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,31,0.03230399886767069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,15,0.026554666459560394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,63,0.03196800003449122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,31,0.032111999889214836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,63,0.03267733256022135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,255,0.04513066510359446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,127,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,511,0.06557866434256236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,127,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,1,0.007717333113153775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,255,0.04043200115362803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,3,0.007637333124876022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,1,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,511,0.06877333422501881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,1023,0.10870400071144104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,3,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,7,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,7,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,1023,0.1384213368097941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,15,0.008058666562040647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,2047,0.26074133316675824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,31,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,15,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,31,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,63,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,63,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,127,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,127,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,255,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,255,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,511,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,2047,0.20866666237513223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,511,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,1023,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,1023,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,2047,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,2047,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,4095,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,4095,0.016666666915019352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,8191,0.02815466622511546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,8191,0.028186666468779247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,16383,0.038586666186650596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,16383,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,32767,0.06836799780527751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,32767,0.05658666789531708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,1,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,65535,0.11641599734624226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,65535,0.10239467024803162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,1,0.009477333476146063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,3,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,3,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,7,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,7,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,15,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,131071,0.17921066284179688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,15,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,31,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,131071,0.20574933290481567
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,31,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,63,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,63,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,127,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,127,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,1023,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,2047,0.027589333554108936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,1023,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,255,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,255,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,511,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,511,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,2047,0.027845333019892376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,4095,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,4095,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,8191,0.06487999856472015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,8191,0.05518933137257894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,16383,0.10662399729092915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,16383,0.0953653355439504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,32767,0.1672160029411316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,32767,0.1875200072924296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,1,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,1,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,3,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,3,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,7,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,7,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,15,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,15,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,65535,0.31486932436625165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,31,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,31,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,65535,0.3540000120798747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,63,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,63,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,127,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,255,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,255,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,511,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,511,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,1023,0.015749332805474598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,1023,0.01637866720557213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,2047,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,2047,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,4095,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,16383,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,4095,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,8191,0.016021333634853363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,131071,0.6097013155619303
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,131071,0.677466630935669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,8191,0.016650666793187458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,16383,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,32767,0.016634666671355564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,32767,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,65535,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,65535,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,1,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,1,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,3,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,3,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,7,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,7,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,15,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,15,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,31,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,131071,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,31,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,131071,0.022698665658632915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,63,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,63,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,511,0.015664000064134598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,127,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,127,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,255,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,255,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,511,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,1023,0.016458666572968166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,1023,0.016117333124081295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,2047,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,2047,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,4095,0.01647466669480006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,4095,0.01639466608564059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,8191,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,8191,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,16383,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,16383,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,32767,0.0176959993938605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,32767,0.01758933315674464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,1,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,65535,0.02292266736427943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,65535,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,1,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,3,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,3,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,7,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,7,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,15,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,15,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,131071,0.04602666695912679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,63,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,31,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,31,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,131071,0.03245333333810171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,63,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,511,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,127,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,255,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,1023,0.02810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,127,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,255,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,511,0.016362667083740234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,1023,0.027813332776228588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,2047,0.03596800069014231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,2047,0.03463999927043915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,4095,0.05538133283456167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,4095,0.04897066454092661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,8191,0.09493866562843323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,8191,0.08372267087300618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,32767,0.3023253281911214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,16383,0.1653600037097931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,16383,0.1471680005391439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,32767,0.2713386615117391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,1,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,1,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,3,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,3,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,65535,0.6232906579971313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,7,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,65535,0.5124640067418417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,31,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,7,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,15,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,15,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,31,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,63,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,63,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,127,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,127,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,131071,0.9909119606018066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,255,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,255,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,131071,0.8047413031260172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,511,0.01575999955336253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,511,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,1023,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,1023,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,2047,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,2047,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,4095,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,4095,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,8191,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,8191,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,16383,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,65535,0.0324799989660581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,16383,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,32767,0.022645334402720135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,32767,0.022650666534900665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,65535,0.04571733375390371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,1,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,1,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,3,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,15,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,3,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,7,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,7,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,15,0.0102186668664217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,131071,0.07629333436489105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,31,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,131071,0.06294399996598561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,127,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,63,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,63,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,511,0.02699200063943863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,127,0.012517333030700684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,255,0.012576000144084295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,255,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,511,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,4095,0.0841919978459676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,1023,0.03161599983771642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,1023,0.030933332939942677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,2047,0.04725866516431173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,2047,0.04249600072701772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,4095,0.06914133330186208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,8191,0.14441600441932678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,8191,0.12231466174125671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,16383,0.264138658841451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,1,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,16383,0.218725323677063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,1,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,3,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,3,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,7,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,32767,0.5040853420893351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,7,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,15,0.009935999910036722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,32767,0.4119093418121338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,15,0.010159999753038088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,31,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,31,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,63,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,63,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,127,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,127,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,255,0.013440000514189402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,255,0.013568000247081121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,511,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,511,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,1023,0.028778667251269024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,1023,0.041290665666262306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,2047,0.07254933317502339
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,2047,0.05680533250172933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,4095,0.10272533694903056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,65535,0.797386646270752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,4095,0.13245333234469095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,65535,0.9887306690216064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,8191,0.25391467412312824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,8191,0.19434666633605957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,1,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,1,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,3,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,3,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,15,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,16383,0.37542398770650226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,7,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,7,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,16383,0.5039840141932169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,15,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,31,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,31,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,63,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,127,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,63,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,127,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,255,0.023631999890009563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,255,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,511,0.039861333866914116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,511,0.0340639998515447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,1023,0.05889600018660227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,1023,0.07550399998823802
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,2047,0.13616533080736795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,1,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,2047,0.10725333293279012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,1,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,3,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,3,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,4095,0.2566559910774231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,7,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,4095,0.199946661790212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,7,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,15,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,15,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,31,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,8191,0.4988853136698405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,31,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,8191,0.3793226480484009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,63,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,63,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,127,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,127,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,255,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,255,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,511,0.016016000260909397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,511,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,1023,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,1023,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,2047,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,2047,0.016447999825080235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,4095,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,4095,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,8191,0.028090665737787884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,8191,0.027877333263556164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,16383,0.038880000511805214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,16383,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,32767,0.07005333403746287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,32767,0.05746666590372721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,65535,0.11959999799728394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,65535,0.10842667023340861
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,1,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,3,0.014533333480358124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,1,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,3,0.014442666123310724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,7,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,7,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,15,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,15,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,131071,0.21706666549046835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,131071,0.1941386659940084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,31,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,31,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,127,0.02239466706911723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,63,0.018725333114465077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,63,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,127,0.022266666094462078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,255,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,511,0.05788266658782959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,255,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,511,0.07454933226108551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,1023,0.13591466347376505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,1023,0.10997866590817769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,2047,0.2576479911804199
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,2047,0.2058239976565043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,1,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,4095,0.49937065442403156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,1,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,3,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,4095,0.3947146733601888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,3,0.02601066728432973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,15,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,7,0.025957333544890087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,7,0.02643733223279317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,15,0.032469332218170166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,31,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,63,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,31,0.03262399882078171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,63,0.033717334270477295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,127,0.04554133117198944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,127,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,511,0.13797332843144736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,255,0.0688266654809316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,1,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,255,0.06572799881299336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,1,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,3,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,511,0.10961066683133443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,3,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,7,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,7,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,1023,0.26136000951131183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,15,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,15,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,31,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,1023,0.20890667041142783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,31,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,63,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,127,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,63,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,127,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,255,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,2047,0.395957350730896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,255,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,2047,0.502560019493103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,511,0.01595199977358182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,511,0.016037333756685257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,1023,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,1023,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,2047,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,2047,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,4095,0.02792533238728841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,4095,0.028016000986099243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,8191,0.03869866579771042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,8191,0.037962667644023895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,16383,0.06901866694291432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,16383,0.05734399954477946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,32767,0.11661866307258606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,32767,0.10570133725802104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,1,0.04292800029118856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,1,0.04293866455554962
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,1,0.0425600012143453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,1,0.04289066791534424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,65535,0.2081493337949117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,3,0.042965332667032875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,3,0.04295999805132548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,65535,0.18503999710083008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,7,0.044719999035199486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,3,0.042538667718569435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,3,0.043098668257395424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,7,0.04461866617202759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,7,0.04454400142033895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,7,0.044405331214269005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,15,0.045893331368764244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,15,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,15,0.045978665351867676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,15,0.04594666759173075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,131071,0.3423253297805786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,131071,0.3891679843266805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,31,0.057087997595469155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,31,0.05708266794681549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,31,0.057520002126693726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,31,0.057258665561676025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,63,0.05758399764696757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,63,0.057477335135142006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,63,0.057802667220433555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,63,0.057861333092053734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,127,0.05843733251094818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,127,0.058378666639328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,127,0.058362667759259544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,127,0.05808533231417338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,255,0.058543999989827476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,255,0.07005866865317027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,255,0.05829866727193197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,255,0.07003733515739441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,511,0.058890665570894875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,511,0.10744532942771912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,511,0.05845333139101664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,511,0.1083679993947347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,1023,0.05891199906667074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,1023,0.18387732903162637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,1023,0.05839466551939646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,1023,0.18609599272410074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,2047,0.05870933334032694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,2047,0.33534399668375653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,2047,0.05861866474151611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,2047,0.3415093421936035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,1,0.04292800029118856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,4095,0.05866666634877523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,4095,0.05839466551939646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,4095,0.637114683787028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,1,0.043023998538653054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,4095,0.6495413382848104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,3,0.04378133515516917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,1,0.04309333364168803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,1,0.042949333786964417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,3,0.043061330914497375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,3,0.043141335248947144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,3,0.043194666504859924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,7,0.04433600107828776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,7,0.04470933477083842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,7,0.044666667779286705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,7,0.044821331898371376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,15,0.0461760014295578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,15,0.04632000128428141
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,15,0.04658666749795278
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,15,0.046256000796953835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,31,0.05783466498057047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,31,0.0577706644932429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,31,0.0581279993057251
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,31,0.0580213318268458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,63,0.0582826683918635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,63,0.05846933523813883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,63,0.058320000767707825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,63,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,127,0.05860800047715505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,127,0.059077332417170204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,127,0.059088001648585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,127,0.05907199780146281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,511,0.10892267028490703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,255,0.05916800101598104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,255,0.07137066622575124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,255,0.05888533095518748
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,255,0.0710399995247523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,511,0.05894400179386139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,511,0.059258664647738137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,511,0.11002133289972942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,1023,0.059290667374928795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,1023,0.1844159960746765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,1023,0.059290667374928795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,1023,0.18829333782196045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,2047,0.05937600135803223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,2047,0.3352479934692383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,2047,0.0591893345117569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,2047,0.3421279986699422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,4095,0.05938666562239329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,4095,0.059232001503308616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,4095,0.6428746779759725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,4095,0.6509546836217245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,1,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,7,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,1,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,1,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,7,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,7,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,3,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,7,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,3,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,15,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,15,0.00850133349498113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,15,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,31,0.011978667229413986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,31,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,31,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,63,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,63,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,3,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,1,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,127,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,3,0.012416000167528788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,255,0.022298666338125866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,255,0.022837333381175995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,63,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,255,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,511,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,511,0.02991466720898946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,127,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,511,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,511,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,1023,0.0390133336186409
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,1023,0.04800533254941305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,1023,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,1023,0.028325334191322327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,15,0.012576000144084295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,2047,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,2047,0.06252266466617584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,2047,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,31,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,2047,0.03723733375469843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,4095,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,4095,0.08072000245253245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,4095,0.011978667229413986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,63,0.012469333906968435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,4095,0.0535093347231547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,127,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,127,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,255,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,3,0.007381333038210869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,3,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,1,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,7,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,7,0.007184000064929326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,1,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,1,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,7,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,15,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,31,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,31,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,31,0.007226666435599327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,15,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,31,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,15,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,63,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,63,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,3,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,1,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,63,0.007146666447321574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,63,0.007226666435599327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,127,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,127,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,7,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,127,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,255,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,255,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,3,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,255,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,511,0.03703466554482778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,511,0.04106133431196213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,511,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,1023,0.036559998989105225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,1023,0.05085866649945577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,1023,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,1023,0.023589332898457844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,15,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,2047,0.03908266623814901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,2047,0.06773333251476288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,2047,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,2047,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,4095,0.03886933376391729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,4095,0.08961066603660583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,4095,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,4095,0.05496533215045929
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,1,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,1,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,1,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,1,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,3,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,3,0.00789866658548514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,3,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,3,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,7,0.007727999861041705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,7,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,127,0.008170666793982187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,7,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,7,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,15,0.007962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,15,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,15,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,15,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,31,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,511,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,31,0.008165333420038223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,31,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,31,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,63,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,63,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,63,0.009888000165422758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,63,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,255,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,127,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,127,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,127,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,127,0.011530666301647821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,255,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,255,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,255,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,255,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,511,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,511,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,511,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,511,0.011546666423479715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,1023,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,1023,0.02609066665172577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,1023,0.021840001145998638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,1023,0.026015999416510265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,2047,0.02197333425283432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,2047,0.030554667115211487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,2047,0.02176533391078313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,2047,0.03048533449570338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,4095,0.021935999393463135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,4095,0.040565334260463715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,4095,0.021967999637126923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,4095,0.040565334260463715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,8191,0.022096000611782074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,8191,0.06071466704209646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,8191,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,8191,0.06061866879463196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,16383,0.02197866638501485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,16383,0.10075199604034424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,16383,0.021984001000722248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,16383,0.10021866361300151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,1,0.007802666475375493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,1,0.007903999959429106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,1,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,1,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,3,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,3,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,3,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,3,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,7,0.007978666573762894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,7,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,7,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,7,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,15,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,15,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,15,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,15,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,31,0.008090666805704435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,31,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,31,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,31,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,63,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,63,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,63,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,63,0.009904000287254652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,127,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,255,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,127,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,127,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,127,0.011418666690587997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,255,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,255,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,255,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,511,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,511,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,511,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,511,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,1023,0.02204799900452296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,1023,0.026181332767009735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,2047,0.030250666042168934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,1023,0.021903999149799347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,1023,0.025968000292778015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,2047,0.02202133337656657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,2047,0.030640001098314922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,2047,0.02176533391078313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,8191,0.060693333546320595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,4095,0.022096000611782074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,4095,0.040735999743143715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,4095,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,4095,0.04045333216587702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,8191,0.021914665897687275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,8191,0.0220320001244545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,16383,0.10025599598884583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,8191,0.06055466830730438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,16383,0.02197333425283432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,1,0.007834666719039282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,16383,0.10074667135874431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,16383,0.02187199890613556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,1,0.007930666829148928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,1,0.007823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,1,0.007760000104705493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,3,0.007903999959429106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,7,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,3,0.007962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,3,0.007760000104705493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,3,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,7,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,7,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,7,0.00843733362853527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,15,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,15,0.00789866658548514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,15,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,15,0.007696000238259633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,31,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,31,0.00810666692753633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,31,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,31,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,63,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,63,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,63,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,63,0.008170666793982187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,127,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,127,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,127,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,127,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,255,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,255,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,255,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,255,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,511,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,511,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,511,0.009477333476146063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,511,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,1023,0.021941334009170532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,1023,0.02624000112215678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,1023,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,1023,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,2047,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,2047,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,4095,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,2047,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,2047,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,4095,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,4095,0.01463466634353002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,4095,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,8191,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,8191,0.01829333355029424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,8191,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,8191,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,16383,0.01588800052801768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,16383,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,16383,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,16383,0.02080533280968666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,1,0.007882666463653246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,1,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,1,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,1,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,3,0.007882666463653246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,3,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,3,0.0064266665528217954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,3,0.006522666662931442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,7,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,7,0.008053333188096682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,7,0.006666666517655055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,31,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,7,0.00660800002515316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,15,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,15,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,15,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,15,0.006746666505932808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,31,0.008021333565314611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,31,0.006725333631038666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,31,0.006394666930039723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,63,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,63,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,63,0.006693333387374878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,63,0.00666133314371109
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,127,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,127,0.009632000078757605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,127,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,127,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,255,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,255,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,255,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,255,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,511,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,511,0.010175999874869982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,511,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,511,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,1023,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,1023,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,1023,0.007893333211541176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,1023,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,2047,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,2047,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,2047,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,2047,0.010559999694426855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,4095,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,4095,0.013621332744757334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,4095,0.008058666562040647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,4095,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,8191,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,8191,0.020266667008399963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,8191,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,8191,0.016373333831628162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,16383,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,16383,0.023733332753181458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,16383,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,16383,0.02011200040578842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,1,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,1,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,1,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,1,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,3,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,3,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,3,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,3,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,7,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,7,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,15,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,7,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,31,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,31,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,7,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,15,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,15,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,15,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,31,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,31,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,63,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,63,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,63,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,63,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,127,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,127,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,127,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,127,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,255,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,255,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,255,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,255,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,511,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,511,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,511,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,511,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,1023,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,1023,0.018053332964579265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,1023,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,1023,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,2047,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,2047,0.028037334481875103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,4095,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,2047,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,2047,0.028917332490285236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,4095,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,4095,0.04804799954096476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,4095,0.05006400247414907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,8191,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,8191,0.08781333764394124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,8191,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,8191,0.09188266595204671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,16383,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,16383,0.16715200742085776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,16383,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,16383,0.1753173271814982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,1,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,1,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,1,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,1,0.008405333384871483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,3,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,3,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,3,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,3,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,7,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,7,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,7,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,7,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,15,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,15,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,15,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,15,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,31,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,31,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,31,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,31,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,63,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,63,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,63,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,63,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,127,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,127,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,127,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,127,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,255,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,255,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,255,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,255,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,511,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,511,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,511,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,511,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,1023,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,1023,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,1023,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,1023,0.018170667191346485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,2047,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,2047,0.028037334481875103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,2047,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,2047,0.0286613330245018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,4095,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,4095,0.048010667165120445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,4095,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,4095,0.050111999114354454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,8191,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,8191,0.0876693328221639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,8191,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,8191,0.09174399574597676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,16383,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,16383,0.16725865999857584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,16383,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,16383,0.17550400892893472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,1,0.008592000231146812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,1,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,1,0.007738666608929634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,1,0.007647999872763951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,3,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,3,0.008592000231146812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,3,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,3,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,7,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,7,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,7,0.008165333420038223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,7,0.008021333565314611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,15,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,15,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,15,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,15,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,31,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,31,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,31,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,31,0.008272000278035799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,63,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,63,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,63,0.00816000004609426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,63,0.0081386665503184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,127,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,127,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,127,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,127,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,255,0.01138666644692421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,255,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,255,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,255,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,511,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,511,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,511,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,1023,0.013418667018413544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,1023,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,1023,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,1023,0.011920000116030375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,2047,0.014165333161751429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,2047,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,2047,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,2047,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,8191,0.023999998966852825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,4095,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,4095,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,4095,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,4095,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,8191,0.018320000420014065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,16383,0.026943999032179516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,8191,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,1,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,8191,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,16383,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,16383,0.028437333802382152
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,16383,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,1,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,1,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,1,0.006458666796485583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,3,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,3,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,15,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,3,0.006346666564544042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,3,0.006842666616042455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,7,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,7,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,7,0.006784000123540561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,7,0.00666133314371109
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,15,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,15,0.006384000182151794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,15,0.006666666517655055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,31,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,31,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,31,0.006693333387374878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,31,0.006762666627764702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,63,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,63,0.006538666784763336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,63,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,127,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,127,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,127,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,127,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,255,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,255,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,255,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,255,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,511,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,1023,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,511,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,511,0.008053333188096682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,511,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,1023,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,2047,0.011690666278203329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,1023,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,1023,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,2047,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,4095,0.013610667238632837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,2047,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,2047,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,4095,0.01599466676513354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,4095,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,8191,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,4095,0.008245333408315977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,16383,0.022485333184401195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,8191,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,8191,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,8191,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,16383,0.037861332297325134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,16383,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,16383,0.0259253333012263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,1,0.07899733384450276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,1,0.07877333462238312
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,1,0.07930666704972585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,3,0.07952000200748444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,1,0.07919466495513916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,3,0.07900799810886383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,3,0.07906133433183034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,3,0.07954666515191396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,7,0.08240533371766408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,7,0.08268799881140391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,7,0.08239999910195668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,7,0.08273066580295563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,15,0.08522666494051616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,15,0.08526933193206787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,15,0.08554133772850037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,15,0.08544533451398213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,31,0.1076639990011851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,31,0.10760000348091125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,31,0.10843732953071594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,31,0.10866133371988933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,63,0.10853866736094157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,63,0.10838400324185689
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,63,0.10894933342933655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,63,0.10897066195805867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,127,0.11013333002726237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,127,0.1102239986260732
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,127,0.11002133289972942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,127,0.11010666688283284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,255,0.11046399672826131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,255,0.1332319974899292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,255,0.11009599765141805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,255,0.13293866316477457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,1023,0.11107200384140015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,511,0.11105066537857056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,511,0.20730133851369223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,511,0.11009599765141805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,511,0.20881066719690958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,1023,0.3560853401819865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,1023,0.11009599765141805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,1023,0.36046401659647626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,1,0.0793333351612091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,2047,0.6624000072479248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,2047,0.11096533139546712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,3,0.0795360008875529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,2047,0.11033067107200623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,2047,0.6508853435516357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,1,0.07935466865698497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,1,0.07981866598129272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,1,0.07977066437403361
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,7,0.08298666775226593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,3,0.07952000200748444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,3,0.08006933331489563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,3,0.08003200093905131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,7,0.08272533118724823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,7,0.08260266482830048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,31,0.10849600036938985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,7,0.08309333523114522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,15,0.08562666177749634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,15,0.08559999863306682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,15,0.08595200379689534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,15,0.0860533316930135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,31,0.10846933722496033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,31,0.10910933216412862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,31,0.10924800237019856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,63,0.10958932836850484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,63,0.10939199725786845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,63,0.10990400115648906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,63,0.11006933450698853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,127,0.11125333110491435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,255,0.11120532949765523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,127,0.11104533076286316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,127,0.1108746627966563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,127,0.11109866698582967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,511,0.11122666796048482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,255,0.11152000228563945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,1023,0.11196800072987874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,255,0.13479466239611307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,255,0.13423466682434082
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,511,0.11223999659220378
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,1023,0.3635893265406291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,511,0.2090346614519755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,511,0.21054399013519287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,1023,0.3561973174413045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,1023,0.11134933431943257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,2047,0.11181867122650146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,2047,0.11117866635322571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,2047,0.6567893425623575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,3,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,1,0.017738666385412216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,1,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,2047,0.6633866628011068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,1,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,7,0.01826133330663045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,1,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,3,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,3,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,3,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,7,0.017808000246683758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,7,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,7,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,15,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,15,0.017786666750907898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,15,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,15,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,31,0.017797333498795826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,31,0.018533332894245785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,31,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,31,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,63,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,63,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,63,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,63,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,127,0.0183146670460701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,127,0.01785600061217944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,127,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,127,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,255,0.03271999955177307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,255,0.032511999209721885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,255,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,255,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,511,0.04429866870244344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,511,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,511,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,511,0.028175999720891316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,1023,0.04350399971008301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,1023,0.06127466758092245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,1023,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,1023,0.03524799893299738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,2047,0.04673066735267639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,2047,0.0828959991534551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,2047,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,2047,0.051216001311937966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,1,0.02404266595840454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,1,0.024186665813128155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,1,0.008069333309928576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,1,0.008042666440208754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,3,0.024506665766239166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,3,0.024506665766239166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,3,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,3,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,7,0.02404800057411194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,7,0.024256000916163128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,15,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,7,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,7,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,15,0.024527999262015026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,15,0.02458133300145467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,63,0.02382933348417282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,15,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,31,0.023925334215164185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,31,0.024266667664051056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,31,0.007882666463653246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,31,0.008058666562040647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,63,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,127,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,63,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,63,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,127,0.02378133436044057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,127,0.02380266785621643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,127,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,255,0.04427733520666758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,255,0.01573333392540614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,255,0.04417600234349569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,1023,0.04436799883842468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,255,0.018432000031073887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,511,0.0443200021982193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,511,0.05332799752553304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,511,0.015696000307798386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,511,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,2047,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,1023,0.0666186660528183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,1023,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,1023,0.03428266694148382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,2047,0.04665066798528036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,2047,0.09179199735323589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,2047,0.056101332108179726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,1,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,1,0.013781332721312841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,1,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,1,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,3,0.013909333695967993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,3,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,3,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,3,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,7,0.013973332941532135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,7,0.013999999811251959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,7,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,7,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,15,0.01423466702302297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,15,0.013994666437307993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,15,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,15,0.013861333330472311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,31,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,31,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,31,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,31,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,63,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,63,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,127,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,63,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,63,0.018581333259741466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,127,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,127,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,127,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,255,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,255,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,255,0.018730666488409042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,255,0.018768000106016796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,511,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,1023,0.032144000132878624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,1023,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,511,0.022416000564893086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,511,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,511,0.02199466774861018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,1023,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,1023,0.031983998914559685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,2047,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,2047,0.05131733417510986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,2047,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,2047,0.05211733281612396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,4095,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,4095,0.018751999984184902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,4095,0.09036800265312195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,4095,0.09210667014122009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,8191,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,8191,0.16805867354075113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,8191,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,8191,0.1711946725845337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,16383,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,16383,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,16383,0.32440000772476196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,1,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,16383,0.32869333028793335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,1,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,1,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,1,0.013658666362365087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,3,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,3,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,3,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,3,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,7,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,7,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,7,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,7,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,15,0.014074667046467463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,15,0.014101333916187286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,15,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,15,0.014639999717473984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,31,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,31,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,31,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,31,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,63,0.018432000031073887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,63,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,255,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,63,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,63,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,127,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,127,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,127,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,127,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,255,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,255,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,255,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,511,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,511,0.022218666970729828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,511,0.018730666488409042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,511,0.02197866638501485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,1023,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,1023,0.03229333211978277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,1023,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,1023,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,2047,0.01951466624935468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,2047,0.0513919989267985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,2047,0.018661333868900936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,2047,0.05212266743183136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,4095,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,4095,0.09045867125193278
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,4095,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,4095,0.09236799677213033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,8191,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,8191,0.16784000396728516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,8191,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,1,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,8191,0.17128533124923706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,16383,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,16383,0.3243680000305176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,16383,0.018746666610240936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,3,0.013781332721312841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,16383,0.32914666334788006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,1,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,1,0.007871999715765318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,1,0.008656000097592672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,3,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,3,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,3,0.007791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,7,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,7,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,7,0.008378666515151659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,7,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,15,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,15,0.014250667144854864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,15,0.008261333530147871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,15,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,31,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,31,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,31,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,31,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,63,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,63,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,63,0.008197333042820295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,127,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,63,0.008000000069538752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,127,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,127,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,127,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,255,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,255,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,255,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,255,0.010346666599313417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,511,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,511,0.01368533323208491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,1023,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,511,0.009925333162148794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,2047,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,511,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,1023,0.01332266628742218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,4095,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,1023,0.013584000368913015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,1023,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,2047,0.018320000420014065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,2047,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,2047,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,4095,0.023610666394233704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,4095,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,4095,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,8191,0.022490667800108593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,8191,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,8191,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,8191,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,16383,0.02651199946800868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,16383,0.0460746685663859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,16383,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,16383,0.042490666111310325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,1,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,1,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,1,0.006730666384100914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,1,0.006389333556095759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,3,0.014074667046467463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,3,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,3,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,3,0.00666133314371109
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,7,0.014229333649079004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,7,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,7,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,7,0.006618666773041089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,15,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,15,0.014202666779359182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,15,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,15,0.006805333619316419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,31,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,31,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,31,0.00655466690659523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,31,0.006533333410819371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,63,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,63,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,63,0.006538666784763336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,63,0.006650666395823161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,127,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,127,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,127,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,127,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,255,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,255,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,255,0.007925333455204964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,255,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,511,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,511,0.012319999436537424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,1023,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,2047,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,511,0.008117333054542542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,511,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,1023,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,1023,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,1023,0.008250666782259941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,2047,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,2047,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,2047,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,4095,0.02386666586001714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,4095,0.03246400008598963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,4095,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,8191,0.027930667002995808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,4095,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,8191,0.02779199928045273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,8191,0.04461333155632019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,8191,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,16383,0.03393599887688955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,16383,0.0676693320274353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,16383,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,16383,0.03878933439652125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,1,0.15123732884724936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,1,0.15119999647140503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,1,0.1525759994983673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,1,0.15255999565124512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,3,0.1513813336690267
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,3,0.1514079968134562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,3,0.15281066298484802
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,3,0.1525759994983673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,7,0.1588533322016398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,7,0.15902400016784668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,7,0.15971199671427408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,7,0.15979199608167013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,15,0.1644053359826406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,31,0.20992533365885416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,15,0.16462399562199911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,15,0.16547733545303345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,15,0.16531733671824136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,31,0.20970133940378824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,31,0.21144000689188638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,63,0.21248000860214233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,31,0.2113866607348124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,63,0.2112906575202942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,63,0.2113920052846273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,63,0.21252266565958658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,127,0.21462400754292807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,127,0.2145599921544393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,127,0.21473600467046103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,127,0.21461333831151327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,255,0.21512534221013388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,255,0.2605440020561218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,255,0.21486934026082358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,255,0.2600586613019307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,1023,0.21643733978271484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,511,0.21640000740687051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,1023,0.21473600467046103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,511,0.4071040153503418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,511,0.21475199858347574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,511,0.4108533461888631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,1023,0.701749324798584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,3,0.15313599507013956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,1,0.15279466907183328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,1023,0.7124053637186686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,1,0.15288000305493674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,1,0.15416000286738077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,1,0.1540000041325887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,3,0.1530026694138845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,3,0.15461333592732748
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,3,0.15426133076349893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,7,0.1597866714000702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,7,0.1593119998772939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,7,0.16029333074887595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,7,0.16024000446001688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,15,0.16511467099189758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,15,0.16526400049527487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,15,0.16616533199946085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,15,0.16612266500790915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,31,0.2112906575202942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,31,0.21094934145609537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,31,0.21235734224319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,31,0.21233065923055014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,63,0.21261332432428995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,63,0.21267199516296387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,63,0.2140106757481893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,63,0.21397332350413004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,127,0.21658132473627725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,127,0.21607999006907144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,127,0.216154674688975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,127,0.21638399362564087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,255,0.2169333299001058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,255,0.2635200023651123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,255,0.2166773279507955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,255,0.2622239987055461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,511,0.21870932976404825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,511,0.4094239870707194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,511,0.2162826657295227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,511,0.41365333398183185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,1023,0.21795199314753214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,1023,0.21652267376581827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,1023,0.7076106866200765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,1,0.03012799968322118
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,1,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,1023,0.715002695719401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,1,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,1,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,3,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,3,0.029829333225886028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,3,0.014357333381970724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,3,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,7,0.02956799914439519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,7,0.02956799914439519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,7,0.01452800010641416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,7,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,15,0.029552000264326733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,15,0.0308746670683225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,15,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,15,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,63,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,31,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,63,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,31,0.029653333127498627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,31,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,127,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,31,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,127,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,63,0.029706666866938274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,63,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,127,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,127,0.016341333587964375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,255,0.05487999816735586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,255,0.054010664423306785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,255,0.024245334168275196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,255,0.027808000644048054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,511,0.054416000843048096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,511,0.0658186674118042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,511,0.02446399877468745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,511,0.03586133321126302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,1023,0.05490666627883911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,1023,0.0827466646830241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,1023,0.024186665813128155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,1023,0.0540533314148585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,1,0.041850666205088295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,1,0.04219200213750204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,1,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,1,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,3,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,7,0.042250668009122215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,3,0.042362665136655174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,3,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,3,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,7,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,7,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,7,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,15,0.042223999897638954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,15,0.04246933261553446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,31,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,15,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,15,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,31,0.04200000067551931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,31,0.04196266829967499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,31,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,63,0.04123199979464213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,63,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,63,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,63,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,127,0.04117333392302195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,255,0.04310933252175649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,127,0.0412266676624616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,127,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,127,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,255,0.043680002291997276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,255,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,255,0.020400000115235645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,511,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,1023,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,511,0.05667733152707418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,511,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,511,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,1023,0.043568000197410583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,1023,0.07799999912579854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,1023,0.052629331747690834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,1,0.2953919967015584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,3,0.2953760027885437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,1,0.29517332712809247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,1,0.2982293367385864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,1,0.29811733961105347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,3,0.29530133803685504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,3,0.2984586755434672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,3,0.2981920043627421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,7,0.31196800867716473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,7,0.31164799133936566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,7,0.3135146697362264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,7,0.313920001188914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,15,0.3234826723734538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,15,0.3232373396555583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,15,0.3251466751098633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,15,0.3251466751098633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,31,0.4145280122756958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,63,0.4170453151067098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,31,0.41424532731374103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,31,0.4171573321024577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,31,0.4174026648203532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,63,0.4166293144226074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,63,0.4193120002746582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,63,0.4193600018819173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,127,0.4243199825286865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,127,0.4233333269755046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,127,0.4243573347727458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,127,0.42370132605234784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,255,0.4251360098520915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,255,0.5147306521733602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,255,0.4240373373031616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,1,0.3004320065180461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,255,0.5141706864039103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,1,0.2998666763305664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,1,0.3025546669960022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,1,0.3025493423144023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,3,0.3004693388938904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,3,0.30053865909576416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,3,0.30299200614293414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,3,0.3023786743481954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,7,0.31214932600657147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,7,0.31270933151245117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,7,0.3145973285039266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,7,0.3145493268966675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,15,0.32496533791224164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,15,0.3254879911740621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,15,0.3263733386993408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,15,0.32622400919596356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,31,0.4161226749420166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,31,0.4158933162689209
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,31,0.41841065883636475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,31,0.41859201590220135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,63,0.42026134332021076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,63,0.4219520092010498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,63,0.41938666502634686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,63,0.4215093453725179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,127,0.4268480141957601
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,127,0.4267253478368123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,127,0.42685866355895996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,127,0.4266773462295532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,255,0.4295039971669515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,255,0.5212159951527914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,255,0.4277973175048828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,1,0.05235200126965841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,1,0.05247466762860616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,255,0.517845352490743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,1,0.020080000162124634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,7,0.0524586687485377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,1,0.020165332903464634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,3,0.052970667680104576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,3,0.05302399893601736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,3,0.02025066688656807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,3,0.020303999384244282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,7,0.05243200063705444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,15,0.020655999581019085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,15,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,7,0.020234666764736176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,7,0.02027200038234393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,31,0.020703999946514767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,15,0.05295999844868978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,15,0.05304533243179321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,31,0.05236800014972687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,31,0.05246399839719137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,31,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,63,0.05157333115736643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,63,0.05218133330345154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,127,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,63,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,63,0.020047999918460846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,127,0.05243733525276184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,127,0.051872000098228455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,127,0.023941333095232647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,255,0.054192001620928444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,255,0.05427733560403188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,255,0.02387733260790507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,255,0.03105599929889043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,1,0.07660266757011414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,1,0.07688533266385396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,1,0.021946666141351063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,1,0.02214933435122172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,3,0.07634666562080383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,3,0.07681599756081899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,3,0.021877333521842957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,3,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,7,0.07666133344173431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,7,0.07689066727956136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,7,0.02199466774861018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,31,0.076773335536321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,7,0.022042666872342426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,31,0.076773335536321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,15,0.07628266513347626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,15,0.07689066727956136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,15,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,15,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,31,0.021914665897687275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,31,0.021888000269730885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,63,0.07561600208282471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,63,0.07554666697978973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,63,0.02180800090233485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,63,0.0220266655087471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,127,0.0758186678091685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,127,0.07678399980068207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,127,0.026869334280490875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,127,0.026767998933792114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,255,0.08088533580303192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,255,0.026858667532602947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,255,0.08080000181992848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,255,0.03885333240032196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,1,0.5816053152084351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,1,0.5876213312149048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,1,0.5819413264592489
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,1,0.5877813498179117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,3,0.5820159912109375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,3,0.5824480056762695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,3,0.5885333220163981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,3,0.5873066584269205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,7,0.6174453496932983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,7,0.6213866472244263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,7,0.617786685625712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,15,0.6407519976298014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,7,0.6215413411458334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,15,0.6415359973907471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,15,0.6444960037867228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,15,0.6446719964345297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,31,0.8233706951141357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,31,0.8237013022104899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,63,0.8279360135396322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,31,0.8293386300404867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,31,0.8292319774627686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,63,0.8292480309804281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,63,0.8337706724802653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,63,0.8336959679921468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,127,0.84279465675354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,127,0.8437333106994629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,127,0.8421706358591715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,127,0.8420800367991129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,1,0.5942506790161133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,1,0.5951626698176066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,1,0.5988639990488688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,3,0.5950933297475179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,3,0.5996426741282145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,3,0.5955573320388794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,3,0.5993066628774008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,7,0.618501345316569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,7,0.619434674580892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,7,0.6230719884236654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,7,0.6229333480199178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,15,0.6437439918518066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,15,0.6471146742502848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,15,0.6442240079243978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,15,0.6470400094985962
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,31,0.8257546424865723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,31,0.8267680009206136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,31,0.8309120337168375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,31,0.831109364827474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,63,0.8333226839701334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,63,0.8375306924184164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,63,0.8342346350351969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,63,0.8374239603678385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,1,0.09708266456921895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,127,0.8531573613484701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,127,0.8542239665985107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,1,0.03610666592915853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,127,0.847978671391805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,127,0.8478506406148275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,1,0.09734400113423665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,1,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,3,0.09689600268999736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,3,0.035930665830771126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,3,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,3,0.09725333253542583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,7,0.09734933574994405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,7,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,7,0.09724266330401103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,7,0.03611200054486593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,15,0.09711466232935588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,15,0.09640000263849895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,1,0.5988213221232096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,15,0.036144000788529716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,15,0.036848001182079315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,31,0.09711466232935588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,31,0.036144000788529716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,31,0.09734400113423665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,31,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,63,0.09528000156084697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,63,0.09539733330408733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,63,0.035962666074434914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,127,0.0452106644709905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,127,0.09699199597040813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,63,0.03666666646798452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,127,0.09682666261990865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,127,0.04442666471004486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,1,0.14313066999117532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,1,0.14231999715169272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,1,0.04070399949947993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,3,0.14317333698272705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,1,0.04185600082079569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,3,0.04115733255942663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,3,0.14331733187039694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,3,0.04078399886687597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,7,0.14249599973360697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,7,0.14231999715169272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,7,0.040720000863075256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,7,0.040576001008351646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,15,0.14350400368372598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,15,0.1440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,15,0.041109333435694374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,15,0.0410453329483668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,31,0.14401066303253174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,31,0.1428053379058838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,31,0.04109866668780645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,63,0.14443733294804892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,31,0.04072533299525579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,63,0.14316800236701965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,63,0.04091199984153112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,63,0.040661332507928215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,127,0.14416533708572388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,127,0.14567466576894125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,1,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,127,0.05694933235645294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,127,0.056832000613212585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,1,0.01379199946920077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,1,0.013866666704416275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,1,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,3,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,3,0.013584000368913015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,3,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,3,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,7,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,7,0.01422400027513504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,7,0.014058666924635569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,7,0.014303999642531076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,15,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,15,0.014661333213249842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,15,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,15,0.01463466634353002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,31,0.01811733345190684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,31,0.01806933308641116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,31,0.01829333355029424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,31,0.017952000101407368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,63,0.0182239996890227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,63,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,63,0.01836266616980235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,63,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,127,0.018453333526849747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,127,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,255,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,127,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,127,0.018426666657129925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,255,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,255,0.022122666239738464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,255,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,511,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,1023,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,511,0.03159466634194056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,511,0.01851733277241389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,511,0.03217600037654241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,1023,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,1023,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,1023,0.0528106689453125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,2047,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,2047,0.09134399890899658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,2047,0.018533332894245785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,2047,0.09382933378219604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,4095,0.0186666672428449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,4095,0.1692906618118286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,4095,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,8191,0.33956265449523926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,4095,0.1758240063985189
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,8191,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,16383,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,8191,0.33161065975824994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,8191,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,16383,0.018746666610240936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,16383,0.7276533444722494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,1,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,1,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,16383,0.7782933712005615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,1,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,1,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,3,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,3,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,3,0.0143306665122509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,3,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,7,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,7,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,7,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,7,0.01469333345691363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,15,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,15,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,15,0.014442666123310724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,15,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,31,0.018085333208243053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,31,0.018053332964579265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,31,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,31,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,63,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,63,0.018511999398469925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,63,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,63,0.01833600054184596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,127,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,127,0.018298666924238205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,127,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,127,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,255,0.018432000031073887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,255,0.02179733415444692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,255,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,255,0.02162666618824005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,1023,0.018405333161354065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,511,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,511,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,511,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,511,0.03236266722281774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,2047,0.09090133508046468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,1023,0.018474667022625606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,1023,0.05136533578236898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,1023,0.05264533559481303
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,2047,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,4095,0.16948266824086508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,2047,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,2047,0.09428266684214275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,4095,0.018511999398469925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,4095,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,4095,0.17521067460378012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,8191,0.018458666900793713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,8191,0.3309760093688965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,8191,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,16383,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,8191,0.34013867378234863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,16383,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,1,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,1,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,16383,0.7948746681213379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,1,0.014389333625634512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,16383,0.7796160380045573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,1,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,3,0.014096000542243322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,7,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,3,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,3,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,3,0.008272000278035799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,7,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,7,0.008325333396593729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,7,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,15,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,15,0.014639999717473984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,15,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,15,0.008341333518425623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,31,0.018090666582187016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,31,0.01874133323629697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,31,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,31,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,127,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,63,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,63,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,63,0.008314666648705801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,63,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,127,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,127,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,511,0.013274667163689932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,127,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,255,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,255,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,255,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,255,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,511,0.013557333499193192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,511,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,511,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,1023,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,1023,0.02070933332045873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,1023,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,1023,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,2047,0.021781332790851593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,2047,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,2047,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,2047,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,4095,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,4095,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,4095,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,4095,0.026933332284291584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,8191,0.03193599979082743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,8191,0.053898667295773826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,8191,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,8191,0.0359946663180987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,16383,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,16383,0.0823359986146291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,16383,0.010181333248813948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,16383,0.05937066674232483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,1,0.01033599985142549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,1,0.007376000285148621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,1,0.006464000170429547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,3,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,3,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,3,0.0064106664309899015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,3,0.006698666761318843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,7,0.010357333347201347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,7,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,7,0.006741333131988843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,15,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,15,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,15,0.006688000013430913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,15,0.006634666894872983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,31,0.010378666842977205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,31,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,31,0.006730666384100914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,63,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,63,0.00660800002515316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,63,0.006597333277265231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,127,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,127,0.007573333258430163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,127,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,255,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,255,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,255,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,255,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,511,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,511,0.01806933308641116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,511,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,1023,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,511,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,2047,0.027914665639400482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,1023,0.021717332303524017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,1023,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,1023,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,2047,0.036501333117485046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,2047,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,4095,0.034671999514102936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,2047,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,4095,0.05227200190226237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,4095,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,4095,0.026911998788515728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,8191,0.034874667723973594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,8191,0.06771199901898702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,8191,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,8191,0.036864000062147774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,16383,0.03390933324893316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,16383,0.08873599767684937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,16383,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,16383,0.05780800183614095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,1,1.1560853322347004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,1,1.1686986287434895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,3,1.1595573425292969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,1,1.156160036722819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,3,1.1610133647918701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,1,1.1708799997965496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,3,1.1691466967264812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,3,1.1692907015482585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,7,1.2316266695658367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,7,1.2314186890920003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,7,1.2382773558298747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,15,1.28383469581604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,7,1.2387839953104656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,15,1.2884159882863362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,15,1.2846773465474446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,15,1.2881706555684407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,31,1.6483306884765625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,31,1.6479093233744304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,31,1.6573333740234375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,31,1.6572267214457195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,63,1.6718239784240723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,63,1.670639991760254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,1,1.184127966562907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,63,1.6712266604105632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,1,1.1861813068389893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,1,1.192581335703532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,63,1.670698642730713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,1,1.1935359636942546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,3,1.1868373552958171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,3,1.193679968516032
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,3,1.1876586278279622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,7,1.238368034362793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,7,1.23963197072347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,3,1.1964159806569417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,7,1.2422186533610027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,15,1.2974507013956706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,15,1.2968053023020427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,7,1.2423893610636394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,15,1.2965546449025471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,15,1.2963199615478516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,31,1.6689119338989258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,31,1.6672320365905762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,31,1.6674346923828125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,31,1.6702720324198406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,1,0.1837600072224935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,63,1.6967360178629558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,63,1.6934720675150554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,1,0.18307733535766602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,63,1.6966293652852376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,1,0.08518399794896443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,63,1.693050702412923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,3,0.1834826668103536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,1,0.08547733227411906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,3,0.184879998366038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,3,0.0851039985815684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,3,0.08514133095741272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,7,0.1846933364868164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,7,0.18498667081197104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,7,0.08514666557312012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,7,0.08584533135096233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,15,0.185754656791687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,15,0.0855466624101003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,15,0.1851146618525187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,15,0.08498133222262065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,31,0.1859626571337382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,31,0.08525333801905315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,31,0.1863306760787964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,63,0.18593599398930868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,63,0.18529599905014038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,31,0.08558932940165202
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,63,0.08451732993125916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,63,0.0849120020866394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,1,0.2778666615486145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,1,0.09038933118184407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,1,0.09034666419029236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,1,0.2797653277715047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,3,0.28045332431793213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,3,0.08986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,3,0.28036266565322876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,7,0.2829119960467021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,3,0.09078400333722432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,7,0.2808159987131755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,7,0.0902453362941742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,7,0.09070400396982829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,15,0.2814720074335734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,15,0.09040533502896626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,15,0.28327999512354535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,31,0.2858399947484334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,15,0.09087466200192769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,31,0.2855466604232788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,31,0.08994666735331218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,63,0.28360533714294434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,31,0.09119466940561931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,63,0.08956799904505412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,63,0.2828320066134135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,63,0.08993066350618999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,1,2.399845282236735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,1,2.4058079719543457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,1,2.420469284057617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,3,2.4043893814086914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,1,2.4207520484924316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,3,2.4248159726460776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,3,2.404325326283773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,3,2.425978660583496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,7,2.50657065709432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,7,2.505887985229492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,7,2.517130692799886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,7,2.5162240664164224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,15,2.6097493171691895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,15,2.609648068745931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,15,2.627840042114258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,15,2.6287412643432617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,31,3.3316052754720054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,31,3.338464101155599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,31,3.3605225880940757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,31,3.362559954325358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,1,2.448293368021647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,1,2.446394602457682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,3,2.45359468460083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,3,2.4519786834716797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,1,2.4622133572896323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,1,2.462538719177246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,3,2.467024008433024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,3,2.4653120040893555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,7,2.5150720278422036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,7,2.5314133961995444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,7,2.5331039428710938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,7,2.513802687327067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,15,2.6524853706359863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,15,2.653557300567627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,15,2.639125347137451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,15,2.6397013664245605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,31,3.355205217997233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,1,0.3698720137278239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,1,0.3683146635691325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,31,3.3536478678385415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,1,0.16205333669980368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,3,0.36822934945424396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,31,3.3808959325154624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,1,0.16216533382733664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,3,0.36983466148376465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,3,0.1625333329041799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,31,3.3802614212036133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,3,0.16266666849454245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,7,0.36536534627278644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,7,0.1620693306128184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,15,0.36952535311381024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,7,0.3672800064086914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,15,0.36822934945424396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,7,0.16275200247764587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,15,0.16205333669980368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,31,0.36658668518066406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,15,0.16267733772595724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,31,0.3654773235321045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,31,0.16195199886957803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,1,0.5649653275807699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,1,0.17302932341893515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,1,0.1718026598294576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,31,0.16243732968966165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,3,0.1729653278986613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,3,0.5671519835789999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,1,0.5633279879887899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,3,0.1732693314552307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,3,0.5647253195444742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,7,0.5632693370183309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,7,0.5618720054626465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,7,0.1727466583251953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,15,0.5605653524398804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,15,0.17281599839528403
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,7,0.17234132687250772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,1,0.023914667467276256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,15,0.17291200160980225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,15,0.5635786851247152
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,1,0.02386666586001714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,31,0.5637760162353516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,31,0.17264533042907715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,1,0.023775999744733173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,31,0.5665866533915201
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,31,0.17164800564448038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,1,0.02422933280467987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,3,0.023978665471076965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,3,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,3,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,3,0.023765332996845245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,7,0.02455466737349828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,7,0.024314666787783306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,7,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,7,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,15,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,15,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,15,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,15,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,31,0.031178665657838184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,31,0.03179199993610382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,31,0.03180799881617228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,31,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,63,0.0316746657093366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,127,0.0324799989660581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,63,0.03169066707293192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,63,0.03182933231194814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,63,0.031701333820819855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,127,0.03266666581233343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,127,0.03192000091075897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,127,0.032501332461833954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,255,0.03216533362865448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,255,0.03859733293453852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,255,0.03201066702604294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,255,0.038160001238187156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,511,0.03229333211978277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,511,0.057861333092053734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,1023,0.09752532839775085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,511,0.032058666149775185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,511,0.05853866537412008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,1023,0.03226666649182638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,1023,0.09616000453631084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,1023,0.03213333338499069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,2047,0.03224000086386999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,4095,0.032655999064445496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,2047,0.17294400930404663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,8191,0.032261334359645844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,4095,0.3335786660512288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,2047,0.032314665615558624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,2047,0.17650665839513144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,4095,0.03271466741959254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,4095,0.32502933343251544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,8191,0.03279466678698858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,1,0.024186665813128155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,8191,0.6375253200531006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,3,0.024400000770886738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,3,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,1,0.0235359991590182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,1,0.02362666775782903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,8191,0.6499520142873129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,1,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,3,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,3,0.024293333292007446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,7,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,7,0.024405332903067272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,7,0.02455466737349828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,7,0.02463999887307485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,15,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,15,0.025578667720158894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,15,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,15,0.025813333690166473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,31,0.03217600037654241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,31,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,31,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,31,0.031658666829268135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,63,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,63,0.03164266546567281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,63,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,63,0.032245332996050514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,127,0.032831999162832894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,127,0.03196266790231069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,127,0.032101333141326904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,127,0.032111999889214836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,255,0.031888000667095184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,255,0.03825599948565165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,255,0.03190933416287104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,255,0.038549333810806274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,511,0.03305600086847941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,511,0.05726933479309082
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,1023,0.09822932879130046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,511,0.03226666649182638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,511,0.05801600217819214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,1023,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,1023,0.09583466251691182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,1023,0.03213333338499069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,2047,0.03303466737270355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,2047,0.17219199736913046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,2047,0.032442666590213776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,2047,0.17650665839513144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,4095,0.33480532964070636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,4095,0.032933334509531655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,4095,0.3254133264223735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,4095,0.03199466566244761
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,8191,0.032218667368094124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,8191,0.03233599911133448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,8191,0.6448053518931071
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,1,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,1,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,8191,0.6512213150660197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,1,0.00785600021481514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,1,0.007946666950980822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,3,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,7,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,3,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,3,0.00820266641676426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,3,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,7,0.012432000289360682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,7,0.007946666950980822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,7,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,15,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,15,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,15,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,15,0.008058666562040647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,31,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,31,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,31,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,31,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,63,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,63,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,63,0.00810666692753633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,63,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,127,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,127,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,127,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,127,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,255,0.014538666854302088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,255,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,255,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,255,0.01044800008336703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,511,0.020053333292404812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,511,0.020346666375796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,1023,0.02075733368595441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,511,0.010186666622757912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,511,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,1023,0.02571733295917511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,1023,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,1023,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,2047,0.0323840007185936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,2047,0.042165334026018776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,2047,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,2047,0.024703999360402424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,4095,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,4095,0.06242666641871134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,4095,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,4095,0.039173332353432976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,8191,0.04011200120051702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,8191,0.08070399860541026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,8191,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,8191,0.056559999783833824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,1,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,1,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,1,0.006586666529377301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,1,0.00679466687142849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,3,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,3,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,3,0.006634666894872983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,7,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,7,0.006805333619316419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,7,0.006821333120266597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,15,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,15,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,15,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,15,0.006927999978264173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,31,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,31,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,31,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,31,0.006906666482488315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,63,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,63,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,63,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,63,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,127,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,127,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,127,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,127,0.007477333148320516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,255,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,255,0.018485333770513535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,511,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,255,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,255,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,511,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,511,0.025744001070658367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,511,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,1023,0.034261333445707955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,1023,0.04031999905904134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,1023,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,1023,0.0199946661790212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,2047,0.035274667044480644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,2047,0.05134933193524679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,2047,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,2047,0.024821333587169647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,4095,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,4095,0.06676266590754192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,4095,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,4095,0.034234667817751564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,8191,0.03562666724125544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,8191,0.0881226658821106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,8191,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,8191,0.05578133463859558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,1,0.04164800047874451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,1,0.04264000058174133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,3,0.043493335445721946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,3,0.044639999667803444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,7,0.04489066700140635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,7,0.04609066744645437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,63,0.05746666590372721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,63,0.05677333474159241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,15,0.056501333912213646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,15,0.05650666852792104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,31,0.05691733459631602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,31,0.05625600119431814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,127,0.06956266860167186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,127,0.0691840002934138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,255,0.10674132903416951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,255,0.106495996316274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,511,0.18464533487955728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,511,0.1834933360417684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,1023,0.3372533321380615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,1,0.04194133480389913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,1023,0.3371359904607137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,2047,0.6441226800282797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,4095,1.2589706579844158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,3,0.04423466821511587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,2047,0.645306666692098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,1,0.04288533329963684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,3,0.043525333205858864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,4095,1.2601760228474934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,7,0.04515733321507772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,7,0.04677866895993551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,15,0.057301332553227745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,15,0.05737066765626272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,31,0.05815466741720835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,31,0.057349334160486855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,255,0.10848533113797505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,63,0.057946667075157166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,63,0.058143998185793556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,127,0.07011199990908305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,127,0.0708000014225642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,255,0.1086133321126302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,511,0.1856373349825541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,511,0.18623999754587808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,1023,0.33796266714731854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,1023,0.33986131350199383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,2047,0.6480106512705485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,2047,0.6467573245366415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,4095,1.2630826632181804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,4095,1.2698400020599365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,1,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,3,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,31,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,7,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,15,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,63,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,127,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,7,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,3,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,511,0.03497066597143809
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,31,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,127,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,63,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,1023,0.044138665000597634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,1023,0.05815466741720835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,2047,0.08413867155710857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,2047,0.060959999759991966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,255,0.026202666262785595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,4095,0.10733866691589355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,4095,0.08091733356316884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,1,0.01368533323208491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,15,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,255,0.013359999905029932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,511,0.024666666984558105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,1,0.008010666817426682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,7,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,31,0.00850133349498113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,3,0.008117333054542542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,63,0.008042666440208754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,15,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,127,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,1,0.016399999459584553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,3,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,31,0.016336000214020412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,511,0.0490880012512207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,63,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,511,0.026213333010673523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,1023,0.0626933326323827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,1023,0.03379733363787333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,2047,0.08392533659934998
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,2047,0.05021866659323374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,4095,0.12079999844233195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,4095,0.07234666744867961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,1,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,127,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,1,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,3,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,15,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,3,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,7,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,7,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,15,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,15,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,255,0.03162666658560435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,31,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,63,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,31,0.00996800015370051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,63,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,127,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,7,0.01623999948302905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,127,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,255,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,255,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,511,0.026159999271233875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,511,0.02590399980545044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,4095,0.061333333452542625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,255,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,1023,0.030234667162100475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,1023,0.030202666918436687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,2047,0.04071466624736786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,2047,0.040949332217375435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,4095,0.06062399844328562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,8191,0.10157333811124165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,8191,0.10029333829879761
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,3,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,7,0.008879999940594038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,16383,0.18250133593877158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,1,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,16383,0.1795253356297811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,1,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,3,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,7,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,15,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,15,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,31,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,31,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,255,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,63,0.011434666812419891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,63,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,127,0.011946666985750198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,127,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,255,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,511,0.02608533451954524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,511,0.025861332813898723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,1023,0.030234667162100475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,1023,0.030202666918436687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,2047,0.040709334115187325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,2047,0.04075733323891958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,4095,0.06137600044409434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,4095,0.06055466830730438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,8191,0.1018506685892741
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,8191,0.10000000397364299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,16383,0.18242132663726807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,1,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,16383,0.17941333850224814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,1,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,3,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,3,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,7,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,15,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,15,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,31,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,31,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,63,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,63,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,127,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,127,0.010234666367371878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,255,0.011733333269755045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,255,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,511,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,1023,0.03014400104681651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,1023,0.014101333916187286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,2047,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,2047,0.01573866605758667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,8191,0.0220266655087471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,4095,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,4095,0.01820266619324684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,8191,0.022543999056021374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,16383,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,16383,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,1,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,1,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,3,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,7,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,7,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,15,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,15,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,7,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,31,0.01020800011853377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,31,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,63,0.011930666863918304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,63,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,127,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,255,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,255,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,511,0.026341333985328674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,1023,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,511,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,1023,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,2047,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,2047,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,4095,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,4095,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,8191,0.025909334421157837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,8191,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,16383,0.0305173322558403
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,1,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,16383,0.022319999833901722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,1,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,3,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,3,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,7,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,7,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,15,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,3,0.006773333375652631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,15,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,31,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,31,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,63,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,63,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,127,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,127,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,255,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,255,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,511,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,511,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,1023,0.02784000088771184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,511,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,1023,0.02867199977238973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,2047,0.04836800197760264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,2047,0.048528000712394714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,4095,0.08740267157554626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,4095,0.08871466914812724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,8191,0.16619199514389038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,8191,0.16815465688705444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,16383,0.3255946636199951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,1,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,1,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,16383,0.329093337059021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,3,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,127,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,3,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,7,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,7,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,15,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,15,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,31,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,31,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,63,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,63,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,127,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,127,0.011690666278203329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,255,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,255,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,2047,0.048938666780789696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,511,0.01844266677896182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,511,0.01811733345190684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,1023,0.0277813325325648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,1023,0.02826133370399475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,16383,0.3280106584231059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,2047,0.0476746658484141
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,4095,0.08819199601809184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,4095,0.08867733677228291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,8191,0.1676479975382487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,8191,0.16927466789881387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,1,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,16383,0.32716800769170123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,1,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,3,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,3,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,7,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,63,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,7,0.009861333295702934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,15,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,15,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,31,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,63,0.009557333464423815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,127,0.011274666835864386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,127,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,255,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,2047,0.018474667022625606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,255,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,511,0.017893332988023758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,511,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,1023,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,1023,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,2047,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,4095,0.019914666811625164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,4095,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,8191,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,8191,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,16383,0.038191998998324074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,16383,0.03068266560633977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,7,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,1,0.008282666405042013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,1,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,3,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,3,0.00690133310854435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,7,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,15,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,15,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,31,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,31,0.007525333513816197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,63,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,63,0.00679466687142849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,127,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,127,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,255,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,255,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,511,0.013514666507641474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,511,0.01028266673286756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,4095,0.023893333971500397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,1023,0.013610667238632837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,1023,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,2047,0.02059200033545494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,2047,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,4095,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,8191,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,8191,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,16383,0.05055999755859375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,1,0.07849066456158955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,16383,0.03638399889071783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,1,0.08021333316961925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,3,0.0816480020682017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,3,0.08352532982826233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,7,0.08557867010434468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,7,0.08618666728337605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,15,0.10831466317176819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,15,0.10822400450706482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,31,0.10846400260925293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,31,0.10785067081451416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,63,0.1090880036354065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,63,0.10873599847157796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,127,0.13287466764450073
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,511,0.3571999867757161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,127,0.1320319970448812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,255,0.20720533529917398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,255,0.20693333943684897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,511,0.3584853410720825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,1023,0.6571466525395712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,1023,0.6566666762034098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,1,0.07890666524569194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,1,0.08081600069999695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,3,0.08213333288828532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,2047,1.2616159915924072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,2047,1.257152001063029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,3,0.0835040012995402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,7,0.08560533324877422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,7,0.08708266417185466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,15,0.10912000139554341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,127,0.13435733318328857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,15,0.10860266288121541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,31,0.10957866907119751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,31,0.10943999886512756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,63,0.10986666878064473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,63,0.10951466361681621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,127,0.13405332962671915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,255,0.20906132459640503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,255,0.20985599358876547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,511,0.35900266965230304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,511,0.3599146604537964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,1023,0.6638559897740682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,1023,0.659114678700765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,1,0.020373333245515823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,1,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,2047,1.2638400395711262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,3,0.02015999952952067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,3,0.010277333358923594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,2047,1.267573356628418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,7,0.02067733307679494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,7,0.01028266673286756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,15,0.02025066688656807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,15,0.01020800011853377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,31,0.020389333367347717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,31,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,63,0.02029866725206375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,63,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,127,0.02067199970285098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,1023,0.07876800000667572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,127,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,255,0.038917332887649536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,255,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,511,0.06070933242638906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,511,0.04333333174387614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,1023,0.06081599990526835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,2047,0.08084799846013387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,2047,0.10825600226720174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,1,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,1,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,3,0.026517334083716076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,3,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,7,0.026858667532602947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,7,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,15,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,15,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,31,0.026474667092164356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,31,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,255,0.053317333261171974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,63,0.02643733223279317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,63,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,127,0.027002667387326557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,127,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,255,0.02571733295917511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,511,0.06690133114655812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,511,0.034927998979886375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,1023,0.08366933465003967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,1023,0.050842667619387306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,2047,0.12276267011960347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,1,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,7,0.013621332744757334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,2047,0.07397866745789845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,1,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,3,0.013866666704416275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,3,0.013973332941532135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,7,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,15,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,15,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,31,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,31,0.01829333355029424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,63,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,63,0.018426666657129925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,127,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,127,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,255,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,255,0.0220266655087471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,511,0.03253333270549774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,511,0.03202133377393087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,1023,0.051551997661590576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,1023,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,2047,0.09061333537101746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,2047,0.09107200304667155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,4095,0.16781334082285562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,4095,0.16921067237854004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,8191,0.32417066891988117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,8191,0.325055996576945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,1,0.013744000345468521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,1,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,16383,0.6352853377660116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,16383,0.6362133423487345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,3,0.013594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,3,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,7,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,7,0.014101333916187286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,15,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,127,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,15,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,31,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,255,0.02274666726589203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,31,0.0183146670460701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,63,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,63,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,127,0.018687999496857326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,255,0.022175999979178112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,511,0.032042667269706726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,511,0.03214933226505915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,1023,0.052005335688591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,1023,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,2047,0.09021866321563721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,2047,0.09106666843096416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,4095,0.16856000820795694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,4095,0.16953599452972412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,8191,0.32445865869522095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,8191,0.3250826597213745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,1,0.013471999516089758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,16383,0.6510346730550131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,1,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,16383,0.6361920038859049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,3,0.013866666704416275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,3,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,7,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,7,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,15,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,15,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,31,0.018320000420014065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,31,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,63,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,63,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,127,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,127,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,255,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,255,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,511,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,511,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,1023,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,1023,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,8191,0.045125335454940796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,2047,0.024661332368850708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,2047,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,4095,0.028901333610216778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,4095,0.02070933332045873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,8191,0.03575466573238373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,16383,0.06519466638565063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,16383,0.04479999840259552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,1,0.013418667018413544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,1,0.007055999711155891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,3,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,3,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,7,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,7,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,15,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,15,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,31,0.018325333793958027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,31,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,63,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,63,0.007338666667540868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,127,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,127,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,255,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,255,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,511,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,511,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,1023,0.020047999918460846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,1023,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,2047,0.02828266719977061
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,2047,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,4095,0.04015466570854187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,4095,0.02476266771554947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,8191,0.058592001597086586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,8191,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,16383,0.08665600419044495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,16383,0.05492799977461497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,1,0.15078933040301004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,1,0.1548426647981008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,3,0.15782933433850607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,3,0.16075199842453003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,7,0.16460266709327698
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,7,0.16757333278656006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,15,0.21166932582855225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,15,0.21035732825597128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,127,0.25859200954437256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,31,0.21171732743581137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,31,0.21071465810139975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,63,0.21296000480651855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,63,0.21176532904307047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,127,0.2587733268737793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,255,0.40811200936635333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,255,0.40698667367299396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,1,0.15196800231933594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,511,0.7061866919199625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,1023,1.3024853070576985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,511,0.7056319713592529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,1,0.1562346617380778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,3,0.15865600109100342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,15,0.21292267243067423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,3,0.16127467155456543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,1023,1.2970773379007976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,31,0.21210134029388428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,7,0.16556800405184427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,7,0.16849599281946817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,15,0.21195733547210693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,31,0.21296000480651855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,63,0.21401600042978922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,63,0.21319466829299927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,127,0.26182933648427326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,127,0.2616159915924072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,255,0.4097653230031331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,255,0.41039466857910156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,511,0.7142186959584554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,511,0.7075093587239584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,1,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,7,0.03461333364248276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,1,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,3,0.035045333206653595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,1023,1.3056853612263997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,3,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,1023,1.3079360326131184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,7,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,15,0.035162667433420815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,15,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,31,0.03498666733503342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,31,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,63,0.03513066718975703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,63,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,127,0.03429866582155228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,127,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,255,0.06760533154010773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,255,0.04453866680463155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,511,0.08330133557319641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,511,0.06525866687297821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,1023,0.10889066259066264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,1023,0.0821919987599055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,1,0.04699199895064036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,1,0.01830400029818217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,3,0.04695466657479604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,15,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,31,0.047050664822260536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,3,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,7,0.04734933376312256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,7,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,15,0.04814399778842926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,31,0.018197332819302876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,63,0.04650133351484934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,63,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,127,0.0479360024134318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,127,0.022757334013779957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,255,0.056703999638557434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,255,0.031930667658646904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,511,0.07361066838105519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,511,0.04710933566093445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,1023,0.10923199852307637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,1023,0.06870933373769124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,1,0.2946346600850423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,1,0.3027199904123942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,3,0.30983465909957886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,3,0.3158133427302043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,7,0.32389867305755615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,7,0.3296106656392415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,15,0.4188479979832967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,15,0.41625599066416424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,31,0.41654400030771893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,31,0.41821332772572833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,63,0.420634667078654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,63,0.41839468479156494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,127,0.511082649230957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,127,0.5120693445205688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,1,0.2983520030975342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,255,0.8095466295878092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,255,0.8080906867980957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,1,0.3076853354771932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,3,0.3105066617329915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,3,0.3168960014979045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,7,0.32574933767318726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,7,0.3317226568857829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,15,0.42041067282358807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,15,0.4187519947687785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,31,0.420474648475647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,31,0.4188586473464966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,63,0.42262399196624756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,63,0.42133867740631104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,127,0.522874673207601
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,127,0.5174880027770996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,1,0.06376533210277557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,255,0.8228853543599447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,255,0.8143359820048014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,1,0.031194667021433514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,3,0.06313600142796834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,3,0.03107733279466629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,7,0.06332266827424367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,7,0.03166933357715607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,15,0.06398400167624156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,15,0.03181333343187968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,31,0.06306666632493337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,31,0.03086400032043457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,63,0.06331733365853627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,63,0.03105599929889043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,127,0.06523199876149495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,127,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,255,0.07566399872303009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,255,0.05830933153629303
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,1,0.08649067083994548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,1,0.033946665624777474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,3,0.0864586631457011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,3,0.034448000291983284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,7,0.08736000458399455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,7,0.033770665526390076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,15,0.08708799878756206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,15,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,31,0.08691733082135518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,31,0.03390933324893316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,63,0.0876533289750417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,63,0.03419200082619985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,127,0.08983467022577922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,127,0.05034666756788889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,255,0.10272000233332317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,255,0.06111466884613037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,1,0.5820693174997965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,3,0.6143840154012045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,7,0.6446346839269003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,1,0.5984319845835367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,3,0.6259359916051229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,7,0.6551733414332072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,15,0.8312959671020508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,15,0.8276800314585367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,31,0.8331413269042969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,31,0.8285386562347412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,63,0.8435359795888265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,63,0.836085319519043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,1,0.6081386804580688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,127,1.030293305714925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,1,0.5927679936091105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,3,0.6298506657282511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,127,1.025312026341756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,3,0.616922656695048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,7,0.6499520142873129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,7,0.6619786818822225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,15,0.8382133642832438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,15,0.8334986368815104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,31,0.8471199671427408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,31,0.8365866343180338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,63,0.8597919940948486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,63,0.8496426741282145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,1,0.08643733461697896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,1,0.12075199683507283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,127,1.050170660018921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,3,0.12019733587900798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,7,0.08648533622423808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,127,1.046181360880534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,3,0.08707732955614726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,15,0.12225066622098286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,7,0.12133866548538208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,31,0.1218346655368805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,15,0.08750933408737183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,31,0.08653333783149719
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,63,0.12277332941691081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,63,0.08709866801897685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,127,0.1229759951432546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,1,0.16581333676973978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,127,0.09804800152778625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,1,0.07911466558774312
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,3,0.1646986703077952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,3,0.07928533355395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,7,0.16547733545303345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,15,0.1665013333161672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,7,0.07949866851170857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,15,0.08004799981911977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,31,0.07872533301512401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,127,0.09161600470542908
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,63,0.07878399888674419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,31,0.16858132680257162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,63,0.16641066471735635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,7,0.014442666123310724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,127,0.16769067446390787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,1,0.013994666437307993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,1,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,3,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,3,0.014015999933083853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,7,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,15,0.017952000101407368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,15,0.0179626668492953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,127,0.021888000269730885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,31,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,31,0.01836266616980235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,63,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,63,0.01800000046690305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,127,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,255,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,255,0.03170666595300039
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,511,0.05201066533724467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,511,0.052101333936055504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,1023,0.09173333644866943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,1023,0.09418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,2047,0.17069866259892783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,2047,0.17467200756072998
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,4095,0.3293439944585164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,4095,0.3372693459192912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,8191,0.655402660369873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,8191,0.6652799844741821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,1,0.013674666484196981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,1,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,3,0.014165333161751429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,16383,1.531050682067871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,3,0.014256000518798828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,16383,1.6059892972310383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,7,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,7,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,15,0.0179626668492953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,15,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,31,0.018218666315078735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,31,0.01838933303952217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,63,0.018085333208243053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,63,0.01855466639002164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,127,0.02186666677395503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,1023,0.09119466940561931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,127,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,255,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,255,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,511,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,511,0.05277866621812185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,1023,0.09411733349164327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,2047,0.17208532492319742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,2047,0.1746079921722412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,4095,0.3309386571248372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,4095,0.3371573289235433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,8191,0.6632906595865885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,8191,0.6655040184656779
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,1,0.01393066719174385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,1,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,3,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,3,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,7,0.014202666779359182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,16383,1.6932533582051594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,7,0.009546666716535887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,63,0.017877332866191864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,16383,1.7484532992045085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,15,0.01844266677896182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,15,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,31,0.01802666609485944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,31,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,63,0.009557333464423815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,127,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,255,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,127,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,255,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,511,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,511,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,1023,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,1023,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,2047,0.03462400039037069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,2047,0.0269813338915507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,4095,0.050848002235094704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,16383,0.109333336353302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,4095,0.03133333226044973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,8191,0.07347733279069264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,8191,0.05105599761009216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,1,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,16383,0.08546666304270427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,1,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,3,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,3,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,7,0.00731733317176501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,15,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,15,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,31,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,127,0.008314666648705801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,31,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,63,0.010485333700974783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,63,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,127,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,255,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,255,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,511,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,511,0.020661332954963047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,4095,0.035818666219711304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,1023,0.028815999627113342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,8191,0.0867786705493927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,1023,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,2047,0.04330666859944662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,2047,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,4095,0.06749333441257477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,8191,0.051818668842315674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,16383,0.12120532989501953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,16383,0.07392533123493195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,1,1.237119992574056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,3,1.256208022435506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,3,1.2811573346455891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,1,1.212165355682373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,7,1.315557320912679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,15,1.6719573338826497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,7,1.339413324991862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,15,1.669157346089681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,31,1.6758987108866374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,31,1.6679840087890625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,63,1.689733346303304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,1,1.231280008951823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,1,1.2610186735788982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,63,1.6805226008097331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,3,1.2860533396402996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,3,1.261786699295044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,7,1.3279039859771729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,7,1.3524959882100422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,15,1.6896799405415852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,15,1.6836053530375164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,1,0.23292267322540283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,31,1.6998186111450195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,1,0.17106133699417114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,63,1.7159679730733235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,3,0.17137600978215536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,3,0.23310933510462442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,31,1.6873920758565266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,7,0.2329066594441732
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,63,1.7026559511820476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,7,0.17189333836237589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,15,0.23377599318822226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,15,0.17098132769266763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,31,0.23508266607920328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,31,0.17164266109466553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,63,0.17051732540130615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,1,0.14755200346310934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,63,0.23377066850662231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,3,0.32494399944941205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,3,0.14780267079671225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,7,0.32730666796366376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,1,0.3277440071105957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,7,0.1478613317012787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,15,0.32467732826868695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,15,0.14800000190734863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,31,0.1478506624698639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,63,0.1476746698220571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,63,0.3234773278236389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,31,0.3284800052642822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,1,2.4684054056803384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,3,2.5039893786112466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,3,2.5551199913024902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,7,2.625216007232666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,1,2.4152587254842124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,7,2.671541213989258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,15,3.338399887084961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,15,3.3262879053751626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,1,2.4564053217569985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,31,3.3454294204711914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,31,3.328890800476074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,1,2.514570713043213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,7,2.6510292689005532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,7,2.698293368021647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,15,3.3724692662556968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,15,3.3615147272745767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,1,0.3349386850992839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,31,3.3915786743164062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,1,0.4565226634343465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,3,0.4534826676050822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,3,0.3364853461583455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,31,3.368170738220215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,7,0.4553440014521281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,15,0.33510398864746094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,3,2.5151039759318032
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,31,0.4594133297602336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,7,0.33555201689402264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,3,2.5647360483805337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,1,0.6398026545842489
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,31,0.33639999230702716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,1,0.2851253350575765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,3,0.28617600599924725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,3,0.6450453201929728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,7,0.2855466604232788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,1,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,15,0.640501340230306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,1,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,3,0.023797333240509033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,7,0.6433546543121338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,31,0.6427306731541952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,31,0.2868266701698303
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,3,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,7,0.024821333587169647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,31,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,7,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,15,0.28677332401275635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,15,0.031680000325044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,15,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,31,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,63,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,63,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,127,0.037834666669368744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,127,0.038373333712418876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,255,0.057664001981417336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,255,0.05734399954477946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,511,0.09637866417566936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,511,0.09679999947547913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,2047,0.3327839970588684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,1023,0.17383466164271036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,1023,0.1754186749458313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,15,0.4545493523279826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,2047,0.32866666714350384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,4095,0.6369440158208212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,4095,0.6459039847056071
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,1,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,1,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,8191,1.2755626837412517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,3,0.024197332561016083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,3,0.0249493345618248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,8191,1.27838929494222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,7,0.02495466669400533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,7,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,15,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,15,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,31,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,31,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,63,0.03218133250872294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,63,0.03196266790231069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,127,0.037861332297325134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,127,0.03792533278465271
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,255,0.05740800003210703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,255,0.05728533367315928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,511,0.09611733754475911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,511,0.0974720021088918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,1023,0.174127995967865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,1023,0.17627733945846558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,2047,0.32945066690444946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,2047,0.33284799257914227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,4095,0.650821328163147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,4095,0.6458453337351481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,1,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,1,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,8191,1.2871733506520588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,3,0.012576000144084295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,3,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,8191,1.296122630437215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,7,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,63,0.012335999558369318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,7,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,15,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,15,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,31,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,31,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,63,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,127,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,127,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,255,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,255,0.012576000144084295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,511,0.023797333240509033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,511,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,1023,0.034832000732421875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,1023,0.02628266563018163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,2047,0.054698665936787925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,2047,0.036746665835380554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,4095,0.08616532882054646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,4095,0.06242666641871134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,8191,0.10732266306877136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,8191,0.0821973333756129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,1,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,1,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,3,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,3,0.007903999959429106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,7,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,7,0.007178666690985362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,15,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,15,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,31,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,31,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,63,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,63,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,127,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,127,0.00816000004609426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,255,0.022143999735514324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,255,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,511,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,511,0.018746666610240936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,1023,0.04740799963474274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,1023,0.026560001075267792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,2047,0.06574933230876923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,2047,0.03468266626199087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,4095,0.08481066425641377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,4095,0.0506933331489563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,1,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,8191,0.12141333023707072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,1,0.03295466552178065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,8191,0.07613333563009898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,1,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,7,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,1,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,3,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,3,0.033071999748547874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,3,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,3,0.03369066615899404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,7,0.03502399971087774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,7,0.034714666505654655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,7,0.03429866582155228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,15,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,15,0.03612799942493439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,15,0.035743998984495796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,15,0.03575466573238373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,31,0.04483733574549357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,31,0.044810667634010315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,31,0.044810667634010315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,31,0.044250667095184326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,63,0.0447626660267512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,63,0.04531733194986979
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,63,0.04487466812133789
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,63,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,255,0.05502399802207947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,127,0.045850664377212524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,127,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,127,0.04558399816354116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,127,0.04518933097521464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,255,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,255,0.04533866544564565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,1023,0.04545066754023234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,255,0.05446400245030721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,511,0.04633066554864248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,1023,0.14282666643460593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,511,0.08330666522185008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,511,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,511,0.08454400300979614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,1023,0.1410719950993856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,1023,0.04542933404445648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,2047,0.04580266773700714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,2047,0.25433599948883057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,2047,0.04545066754023234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,2047,0.2600906689961751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,4095,0.04620266457398733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,4095,0.045647998650868736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,4095,0.4812479813893636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,1,0.03311466674009959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,1,0.03379199902216593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,1,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,4095,0.4928160111109416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,1,0.03375466664632162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,3,0.033904001116752625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,3,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,3,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,3,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,7,0.034874667723973594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,7,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,7,0.03451200077931086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,7,0.03495999922355016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,15,0.03615466753641764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,15,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,15,0.03603200117746989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,15,0.036176001032193504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,31,0.045007998744646706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,63,0.04507733384768168
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,31,0.04436799883842468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,31,0.04497066636880239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,31,0.045184001326560974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,63,0.04497066636880239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,63,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,63,0.04513599971930186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,127,0.04561600089073181
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,127,0.045381332437197365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,127,0.04589866598447164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,127,0.046165332198143005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,255,0.045653333266576133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,255,0.05522133409976959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,255,0.045466666420300804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,255,0.054570664962132774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,511,0.04569066564242045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,511,0.08356266220410664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,511,0.0461706668138504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,511,0.08448533217112224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,1023,0.04632533093293508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,1023,0.13991467157999674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,1023,0.045738667249679565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,1023,0.1431893308957418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,2047,0.04621333380540212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,2047,0.25437333186467487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,2047,0.04560000201066335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,2047,0.2595679958661397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,float16,4095,0.04628799855709076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,128,1,float16,fp8,4095,0.04577599962552389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,float16,4095,0.485919992129008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,64,0,1,float16,fp8,4095,0.49273065725962323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,3,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,1,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,7,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,7,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,1,0.011920000116030375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,15,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,15,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,3,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,31,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,31,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,63,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,63,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,127,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,127,0.018725333114465077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,255,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,255,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,511,0.026687999566396076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,511,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,1023,0.037989333271980286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,1023,0.04655466477076212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,2047,0.0613013356924057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,2047,0.04036800066630045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,4095,0.04062933226426443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,4095,0.07806399961312611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,1,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,1,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,3,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,3,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,1,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,1,0.006826666494210561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,3,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,3,0.006720000257094701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,7,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,7,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,7,0.0068853336075941724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,7,0.00690133310854435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,15,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,15,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,15,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,15,0.006895999734600385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,31,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,31,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,31,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,31,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,63,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,63,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,63,0.0069973332186539965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,63,0.00701333334048589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,127,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,127,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,127,0.007925333455204964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,127,0.008000000069538752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,255,0.02683199942111969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,255,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,255,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,255,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,511,0.036687999963760376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,511,0.04098666707674662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,511,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,511,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,1023,0.036330667634805046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,1023,0.049584001302719116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,1023,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,1023,0.022944000860055287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,2047,0.038848000268141426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,2047,0.06757333377997081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,2047,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,2047,0.03306133300065994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,4095,0.03937066594759623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,4095,0.010496000448862711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,4095,0.08847999572753906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,4095,0.05428266525268555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,1,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,1,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,1,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,1,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,3,0.007680000116427739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,3,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,7,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,7,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,7,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,7,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,15,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,15,0.007802666475375493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,15,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,15,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,31,0.008325333396593729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,31,0.007925333455204964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,3,0.007573333258430163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,31,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,31,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,63,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,63,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,63,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,63,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,127,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,127,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,127,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,127,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,255,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,255,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,255,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,255,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,511,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,511,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,511,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,511,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,1023,0.02170666555563609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,1023,0.025807999074459076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,1023,0.021669333179791767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,1023,0.02590399980545044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,2047,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,2047,0.030031998952229817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,2047,0.02160533269246419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,2047,0.030074665943781536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,4095,0.02184533327817917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,4095,0.04012266546487808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,4095,0.021754667162895203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,4095,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,8191,0.02178666740655899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,8191,0.0603359987338384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,8191,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,8191,0.060191998879114784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,16383,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,16383,0.10028800368309021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,16383,0.021701333423455555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,16383,0.09980266292889912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,1,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,1,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,1,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,1,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,3,0.007717333113153775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,3,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,3,0.007765333478649457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,3,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,7,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,7,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,7,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,7,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,15,0.007733333234985669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,15,0.007727999861041705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,15,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,31,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,31,0.008042666440208754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,31,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,31,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,63,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,63,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,63,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,63,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,127,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,127,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,127,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,127,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,255,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,255,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,255,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,255,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,511,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,511,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,511,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,511,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,1023,0.02584533393383026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,1023,0.0216799999276797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,1023,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,1023,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,2047,0.0301706666747729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,2047,0.022618666291236877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,2047,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,2047,0.030042665700117748
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,4095,0.021712000171343487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,4095,0.021738665799299877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,4095,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,4095,0.03997866561015447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,8191,0.021717332303524017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,8191,0.060229331254959106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,8191,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,8191,0.06035199761390686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,float16,16383,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,float16,16383,0.10025599598884583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,0,1,float16,fp8,16383,0.09962667028109233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,64,128,1,float16,fp8,16383,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,1,0.007760000104705493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,1,0.007823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,3,0.007605333502093951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,3,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,7,0.008021333565314611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,7,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,15,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,15,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,31,0.008042666440208754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,31,0.007978666573762894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,63,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,63,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,127,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,127,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,255,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,255,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,511,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,511,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,1023,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,1023,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,2047,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,2047,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,4095,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,4095,0.014661333213249842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,8191,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,8191,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,16383,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,16383,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,1,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,1,0.007765333478649457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,1,0.006010666489601135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,1,0.0061493335912625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,3,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,3,0.007690666864315669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,3,0.006090666477878888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,3,0.006309333567818006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,7,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,7,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,7,0.006037333359320958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,7,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,15,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,15,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,15,0.006165333092212677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,15,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,31,0.007989333321650824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,31,0.008101333553592363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,31,0.006181333214044571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,31,0.006533333410819371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,63,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,63,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,63,0.006213333457708359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,63,0.006480000292261441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,127,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,127,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,127,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,127,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,255,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,255,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,255,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,255,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,511,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,511,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,511,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,511,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,1023,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,1023,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,1023,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,1023,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,2047,0.01251199965675672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,2047,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,2047,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,2047,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,4095,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,4095,0.013546666751305262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,4095,0.007482666522264481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,4095,0.012469333906968435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,8191,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,8191,0.020143999407688778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,8191,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,8191,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,16383,0.016480000068744022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,16383,0.02372266600529353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,16383,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,16383,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,1,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,1,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,1,0.008421333506703377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,1,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,3,0.00847999999920527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,3,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,3,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,3,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,7,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,7,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,7,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,7,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,15,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,15,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,15,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,15,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,31,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,31,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,31,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,31,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,63,0.012122667084137598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,63,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,63,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,127,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,127,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,127,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,127,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,255,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,255,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,255,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,255,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,511,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,511,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,511,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,511,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,1023,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,1023,0.02027733375628789
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,1023,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,1023,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,2047,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,2047,0.028016000986099243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,2047,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,4095,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,4095,0.04837866624196371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,4095,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,4095,0.05046933392683665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,8191,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,2047,0.028768000503381092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,8191,0.08801066875457764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,8191,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,8191,0.09172266721725464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,16383,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,16383,0.16694400707880655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,16383,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,16383,0.17492800951004028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,1,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,1,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,1,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,1,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,3,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,3,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,3,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,3,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,7,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,7,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,7,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,7,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,15,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,15,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,15,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,15,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,31,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,31,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,31,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,31,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,63,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,63,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,63,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,63,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,127,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,127,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,127,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,255,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,255,0.011407999942700068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,255,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,255,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,511,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,511,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,511,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,511,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,1023,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,127,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,1023,0.017968000223239262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,1023,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,1023,0.017871999492247898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,2047,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,2047,0.028064000109831493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,2047,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,2047,0.028783999383449554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,4095,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,4095,0.04795733094215393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,4095,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,4095,0.049973333875338234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,8191,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,8191,0.08777067065238953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,8191,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,8191,0.09159466624259949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,float16,16383,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,float16,16383,0.16729599237442017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,128,1,float16,fp8,16383,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,64,0,1,float16,fp8,16383,0.1746506690979004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,1,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,1,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,3,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,3,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,7,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,7,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,15,0.01777600000301997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,15,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,31,0.009525333220760027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,31,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,63,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,63,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,127,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,127,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,255,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,255,0.017893332988023758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,511,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,511,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,1023,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,1023,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,2047,0.01381333296497663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,4095,0.01370666672786077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,4095,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,2047,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,8191,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,16383,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,8191,0.017909333109855652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,16383,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,1,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,1,0.006239999706546466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,1,0.006042666733264923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,1,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,3,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,3,0.006021333237489064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,7,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,3,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,7,0.005999999741713206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,15,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,7,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,7,0.006384000182151794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,15,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,15,0.006261333202322324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,15,0.006330666442712148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,31,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,31,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,31,0.0064213331788778305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,31,0.006351999938488007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,63,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,63,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,63,0.00625599982837836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,127,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,127,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,127,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,127,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,255,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,255,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,255,0.007818666597207388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,255,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,511,0.007946666950980822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,511,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,511,0.011760000139474869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,511,0.012245333443085352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,1023,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,3,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,1023,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,1023,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,2047,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,1023,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,2047,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,2047,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,4095,0.015882667154073715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,4095,0.019658666104078293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,4095,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,4095,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,2047,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,8191,0.02808533360560735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,8191,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,8191,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,16383,0.02237333357334137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,8191,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,16383,0.03758399933576584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,16383,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,16383,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,1,0.06131199995676676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,1,0.060720001657803856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,1,0.06160533428192139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,1,0.06124266485373179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,3,0.06133866806825002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,3,0.06079466640949249
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,3,0.06171200176080068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,3,0.06132799883683523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,7,0.06359466910362244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,7,0.06309866905212402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,7,0.06357333560784657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,7,0.06339733302593231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,15,0.0660159985224406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,15,0.0661599983771642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,15,0.06519466638565063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,15,0.06564266482988994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,31,0.08396266897519429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,31,0.08229333162307739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,31,0.08269333342711131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,31,0.08292266726493835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,63,0.08355200290679932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,63,0.08291199803352356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,63,0.08368000388145447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,63,0.08404266834259033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,127,0.08402666449546814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,127,0.0842026670773824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,127,0.08531733353932698
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,127,0.08428800106048584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,255,0.08452266454696655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,255,0.10332799951235454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,255,0.08426133791605632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,255,0.10202133655548096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,511,0.08640000224113464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,511,0.15773866573969522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,511,0.08451199531555176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,511,0.16106133659680685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,1023,0.08488000432650249
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,1023,0.2682666579882304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,1023,0.08551466464996338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,1023,0.2734079957008362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,2047,0.08516266942024231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,2047,0.49117334683736164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,2047,0.08448533217112224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,float16,1,0.06156266729036967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,float16,1,0.060864001512527466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,2047,0.49827734629313153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,fp8,1,0.061679999033610024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,fp8,1,0.06113066772619883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,float16,3,0.06100266675154368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,float16,3,0.0614879975716273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,fp8,3,0.0613919993241628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,fp8,3,0.06117866436640421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,float16,7,0.06318933268388112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,float16,7,0.06317866841952006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,fp8,7,0.06346133351325989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,fp8,7,0.06341866652170818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,float16,15,0.06550933420658112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,float16,15,0.0654666672150294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,fp8,15,0.06571733454863231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,fp8,15,0.06572266419728597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,float16,31,0.08249600231647491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,float16,31,0.0824480007092158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,fp8,31,0.08357333143552144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,fp8,31,0.08493866523106892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,float16,63,0.08335999647776286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,float16,63,0.08328000207742055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,fp8,63,0.08381866415341695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,fp8,63,0.08380267024040222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,float16,127,0.08449066678682964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,float16,127,0.08434666196505229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,fp8,127,0.08455466230710347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,fp8,127,0.08503466844558716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,float16,255,0.10276800394058228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,float16,255,0.08481066425641377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,fp8,255,0.08457066615422566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,fp8,255,0.10212799906730652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,float16,511,0.08517866333325703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,float16,511,0.15826132893562317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,fp8,511,0.08589866757392883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,fp8,511,0.16154133280118307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,float16,1023,0.08565866947174072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,float16,1023,0.2683680057525635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,fp8,1023,0.084714670976003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,fp8,1023,0.27500800291697186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,float16,2047,0.08547733227411906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,float16,2047,0.496234655380249
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,128,1,float16,fp8,2047,0.08462933699289958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,64,0,1,float16,fp8,2047,0.500767985979716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,1,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,1,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,3,0.01844266677896182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,3,0.017727999637524288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,7,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,7,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,15,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,15,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,31,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,31,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,63,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,63,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,127,0.017909333109855652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,127,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,255,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,255,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,511,0.042250668009122215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,511,0.048058668772379555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,1023,0.043237333496411644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,1023,0.05975466469923655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,2047,0.04468800127506256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,2047,0.07925866544246674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,1,0.025759999950726826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,1,0.02593066543340683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,1,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,3,0.02409599969784419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,3,0.02478933334350586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,3,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,3,0.007829333345095316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,1,0.007482666522264481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,7,0.02420799930890401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,7,0.02385066697994868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,7,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,7,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,15,0.024266667664051056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,15,0.02422400067249934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,15,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,15,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,31,0.024314666787783306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,31,0.024170666933059692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,31,0.007776000226537387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,31,0.007605333502093951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,63,0.023898666103680927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,63,0.023792001108328503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,63,0.007802666475375493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,63,0.007573333258430163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,127,0.023797333240509033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,127,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,127,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,127,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,255,0.044138665000597634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,255,0.04424533247947693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,255,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,255,0.01777600000301997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,511,0.044154668847719826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,511,0.052986666560173035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,511,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,511,0.022730665902296703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,1023,0.04427200059096018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,1023,0.06635199983914693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,1023,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,1023,0.03283733377854029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,2047,0.04679466784000397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,2047,0.0912000040213267
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,2047,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,2047,0.05427733560403188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,1,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,1,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,1,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,1,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,3,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,3,0.013376000026861826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,3,0.01370666672786077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,3,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,7,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,7,0.01357866699496905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,7,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,7,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,15,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,15,0.013621332744757334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,15,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,15,0.013605333864688873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,31,0.013861333330472311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,31,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,31,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,31,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,63,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,63,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,63,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,63,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,127,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,127,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,127,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,127,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,255,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,255,0.015834666788578033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,255,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,255,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,511,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,511,0.018405333161354065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,511,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,511,0.018101333330074947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,1023,0.015834666788578033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,1023,0.02589333305756251
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,1023,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,1023,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,2047,0.01575999955336253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,2047,0.0403466671705246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,2047,0.015642666568358738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,2047,0.0407679999868075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,4095,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,4095,0.07008533179759979
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,4095,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,4095,0.07165333131949107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,8191,0.015834666788578033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,8191,0.1292586624622345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,8191,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,8191,0.1320693294207255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,16383,0.015856000284353893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,16383,0.24878400564193726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,16383,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,16383,0.2531999945640564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,1,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,1,0.013658666362365087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,1,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,1,0.013562666873137156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,3,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,3,0.013701333353916803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,3,0.013674666484196981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,3,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,7,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,7,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,7,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,7,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,15,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,15,0.013610667238632837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,15,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,15,0.013658666362365087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,31,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,31,0.013424000392357508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,31,0.013461332768201828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,31,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,63,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,63,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,63,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,63,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,127,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,127,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,127,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,127,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,255,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,255,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,255,0.015754666179418564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,255,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,511,0.015813333292802174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,511,0.01841066653529803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,511,0.015696000307798386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,511,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,1023,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,1023,0.02593066543340683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,1023,0.015642666568358738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,1023,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,2047,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,2047,0.04032533367474874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,2047,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,2047,0.040752001106739044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,4095,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,4095,0.07016533116499583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,4095,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,4095,0.07164266705513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,8191,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,8191,0.12928533554077148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,8191,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,8191,0.1321333348751068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,float16,16383,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,float16,16383,0.24775999784469604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,128,1,float16,fp8,16383,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,64,0,1,float16,fp8,16383,0.25334932406743366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,1,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,1,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,3,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,3,0.013621332744757334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,7,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,7,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,15,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,15,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,31,0.013482666263977686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,31,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,63,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,63,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,127,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,127,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,255,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,255,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,511,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,511,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,1023,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,1023,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,2047,0.01788266624013583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,2047,0.020501332978407543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,4095,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,4095,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,8191,0.022255999346574146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,8191,0.032469332218170166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,16383,0.025861332813898723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,16383,0.04475200176239014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,1,0.013584000368913015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,1,0.013594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,1,0.006277333324154218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,3,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,3,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,3,0.006341333190600078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,3,0.006282666698098183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,7,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,7,0.013722666849692663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,7,0.006501333167155583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,7,0.006384000182151794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,15,0.013749333719412485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,15,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,15,0.006490666419267654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,15,0.006330666442712148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,31,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,1,0.006341333190600078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,31,0.013621332744757334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,31,0.006469333544373512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,31,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,63,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,63,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,63,0.006464000170429547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,63,0.006341333190600078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,127,0.017797333498795826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,127,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,127,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,127,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,255,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,255,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,255,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,255,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,511,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,511,0.011946666985750198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,511,0.007823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,511,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,1023,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,1023,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,1023,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,1023,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,2047,0.024112001061439514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,2047,0.019941333681344986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,2047,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,2047,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,4095,0.023557332654794056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,4095,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,4095,0.007871999715765318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,4095,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,8191,0.04409066836039225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,8191,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,8191,0.026949333647886913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,16383,0.0341333324710528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,16383,0.06783999999364217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,16383,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,16383,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,1,0.11534399787584941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,1,0.11591466267903645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,1,0.1163146694501241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,1,0.11678399642308553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,3,0.11543466647466023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,3,0.11652266979217529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,3,0.11602666974067688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,3,0.11644267042477925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,7,0.1204159955183665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,7,0.12098133563995361
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,7,0.12411733468373616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,7,0.12181867162386577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,15,0.12788800398508707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,15,0.12577600280443826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,15,0.12897599736849466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,31,0.16013866662979126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,31,0.1611840029557546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,31,0.1606613298257192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,31,0.160480002562205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,63,0.16074132919311523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,63,0.16076800227165222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,15,0.12575999895731607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,63,0.1621226668357849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,63,0.16182933251063028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,127,0.16301866372426352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,8191,0.027888000011444092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,127,0.1628426710764567
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,127,0.1634986698627472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,255,0.16395200292269388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,255,0.19946134090423584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,255,0.16370667020479837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,127,0.1653600037097931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,255,0.19796266158421835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,511,0.1651520033677419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,511,0.3104906678199768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,511,0.16345066825548807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,511,0.3118720054626465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,1023,0.16670932372411093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,1023,0.1634880006313324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,1023,0.5291839838027954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,float16,1,0.1159946620464325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,1023,0.5406026840209961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,float16,1,0.11585066715876262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,fp8,1,0.11684800187746684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,fp8,1,0.11664533615112305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,float16,3,0.1159946620464325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,float16,3,0.11615999539693196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,fp8,3,0.11672533551851909
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,fp8,3,0.1167733371257782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,float16,7,0.12044266859690349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,float16,7,0.12040533622105916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,fp8,7,0.12110400199890137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,fp8,7,0.12127466996510823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,float16,15,0.12517333030700684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,fp8,15,0.12589866916338602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,fp8,15,0.12589333454767862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,float16,31,0.15963733196258545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,float16,31,0.15985600153605142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,fp8,31,0.1607146660486857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,fp8,31,0.16060266892115274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,float16,63,0.16100266575813293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,float16,63,0.1609760026137034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,fp8,63,0.1623146633307139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,fp8,63,0.16221333543459573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,float16,15,0.1251413325468699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,float16,127,0.16380266348520914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,float16,127,0.16338133811950684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,fp8,127,0.16377600034077963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,fp8,127,0.16378133495648703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,float16,255,0.16377066572507223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,float16,255,0.20167466004689535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,fp8,255,0.16456533471743265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,fp8,255,0.19852266709009805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,float16,511,0.16475199659665427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,float16,511,0.3089546759923299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,fp8,511,0.16476266582806906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,fp8,511,0.3131893277168274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,float16,1023,0.16476266582806906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,float16,1023,0.5370453198750814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,128,1,float16,fp8,1023,0.16389866669972739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,64,0,1,float16,fp8,1023,0.5419253508249918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,1,0.028965334097544353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,1,0.03001066545645396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,3,0.030879999200503033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,3,0.03090133269627889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,7,0.02882666637500127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,7,0.030954666435718536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,15,0.028543998797734577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,15,0.028368001182874043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,31,0.030634666482607525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,31,0.030975999931494396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,63,0.030207999050617218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,63,0.028399998943010967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,127,0.02790933350721995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,127,0.030346666773160298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,255,0.05287466446558634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,255,0.05216533442338308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,511,0.052928000688552856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,511,0.06343466540177663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,1023,0.05484800040721893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,1023,0.08056533336639404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,1,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,1,0.04409599800904592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,1,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,3,0.04095466683308283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,3,0.041237334410349526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,3,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,3,0.012357333054145178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,7,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,7,0.041482667128245033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,7,0.012351999680201212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,7,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,15,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,15,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,15,0.012351999680201212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,15,0.012389333297808966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,31,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,1,0.012319999436537424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,31,0.041402667760849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,31,0.012335999558369318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,31,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,63,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,63,0.04051200052102407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,63,0.012448000411192576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,63,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,127,0.04164800047874451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,127,0.04063999901215235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,127,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,127,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,255,0.04372799893220266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,255,0.042250668009122215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,255,0.016186666985352833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,255,0.019866666446129482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,511,0.04341333111127218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,511,0.05463466544946035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,511,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,511,0.029872000217437744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,1023,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,1023,0.07707199951012929
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,1023,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,1023,0.051072001457214355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,1,0.224400003751119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,1,0.22429867585500082
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,1,0.2262506683667501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,1,0.22632533311843872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,3,0.22431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,3,0.22444266080856323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,3,0.22619734207789102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,3,0.22650132576624551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,7,0.2347573240598043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,7,0.23497066895167032
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,7,0.23684799671173096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,7,0.23701866467793783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,15,0.24441067377726236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,15,0.24453866481781006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,15,0.2460106611251831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,15,0.24606933196385702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,31,0.3130026658376058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,31,0.3130613366762797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,31,0.3150026599566142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,31,0.31519466638565063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,63,0.31550933917363483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,63,0.3155733346939087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,63,0.31835732857386273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,63,0.31838399171829224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,127,0.31998932361602783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,127,0.32017600536346436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,127,0.32132800420125324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,127,0.3214346567789714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,255,0.32180800040562946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,255,0.3933759927749634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,255,0.32154132922490436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,255,0.38995734850565594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,float16,1,0.22565333048502603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,float16,1,0.22591465711593628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,fp8,1,0.22730666399002075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,fp8,1,0.22749332586924234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,float16,3,0.225983997186025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,float16,3,0.22614399592081705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,fp8,3,0.22748800118764242
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,fp8,3,0.22772266467412314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,float16,7,0.2349546750386556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,float16,7,0.23483733336130777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,fp8,7,0.24035199483235678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,fp8,7,0.23734933137893677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,float16,15,0.24528533220291138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,float16,15,0.24500266710917154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,fp8,15,0.2469759980837504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,fp8,15,0.24672534068425497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,float16,31,0.3134346604347229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,float16,31,0.31362666686375934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,fp8,31,0.3153333266576131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,fp8,31,0.3154346744219462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,float16,63,0.3158559997876485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,float16,63,0.31591999530792236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,fp8,63,0.31876800457636517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,fp8,63,0.318832000096639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,float16,127,0.3206719954808553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,float16,127,0.3206719954808553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,fp8,127,0.3216106692949931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,fp8,127,0.3216480016708374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,float16,255,0.3219146728515625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,float16,255,0.3939093351364136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,128,1,float16,fp8,255,0.32206400235493976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,64,0,1,float16,fp8,255,0.39188265800476074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,1,0.05203199883302053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,1,0.05201066533724467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,3,0.05107733110586802
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,3,0.050997331738471985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,7,0.05125333368778229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,7,0.051141331593195595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,15,0.05347733199596405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,15,0.05133866767088572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,31,0.05107733110586802
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,31,0.05086933573087057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,63,0.05030933519204458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,63,0.052501335740089417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,127,0.05082666873931885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,127,0.050106664498647056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,255,0.052527998884518944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,255,0.051882664362589516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,1,0.07589333256085713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,1,0.07564266522725423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,1,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,1,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,3,0.07593599955240886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,3,0.07604800164699554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,3,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,3,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,7,0.07610133290290833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,7,0.07612800101439159
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,7,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,7,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,15,0.07604266703128815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,15,0.07606400052706401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,15,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,15,0.022367998957633972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,31,0.07588266829649608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,31,0.07579199969768524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,31,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,31,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,63,0.07459733386834462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,63,0.07458666463692983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,63,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,63,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,127,0.07543999950091045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,127,0.07523199915885925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,127,0.025941332181294758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,255,0.08066133161385854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,255,0.07996266583601634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,255,0.026863999664783478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,255,0.03812800099452337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,1,0.4416693449020386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,1,0.44204266866048175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,1,0.44679466883341473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,1,0.44680531819661456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,127,0.0260959987839063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,3,0.44227198759714764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,3,0.4463040033976237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,3,0.44386665026346844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,3,0.44631465276082355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,7,0.46484800179799396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,7,0.46375465393066406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,7,0.46811731656392414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,7,0.468234658241272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,15,0.4833120107650757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,15,0.48344000180562335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,15,0.4865279992421468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,15,0.4866400162378947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,31,0.6203466653823853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,31,0.6211839914321899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,31,0.624234676361084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,31,0.6243573427200317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,63,0.6254719893137614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,63,0.6256746848424276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,63,0.6311359802881876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,63,0.631226658821106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,127,0.6342080036799113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,127,0.6343626578648885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,127,0.6371680100758871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,127,0.6374560197194418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,float16,1,0.4457706610361735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,float16,1,0.4458506504694621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,fp8,1,0.449013352394104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,fp8,1,0.4490773280461629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,float16,3,0.4461386601130168
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,float16,3,0.44626665115356445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,fp8,3,0.4493066469828288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,fp8,3,0.44926400979359943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,float16,7,0.4637920061747233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,float16,7,0.4636853138605754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,fp8,7,0.4682240088780721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,fp8,7,0.4683786630630493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,float16,15,0.48387734095255536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,float16,15,0.4838240146636963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,fp8,15,0.48684799671173096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,fp8,15,0.4870506525039673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,float16,31,0.62062935034434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,float16,31,0.6208693186442057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,fp8,31,0.6245173215866089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,fp8,31,0.6248693466186523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,float16,63,0.6256853342056274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,float16,63,0.6259040037790934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,fp8,63,0.6315466562906901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,fp8,63,0.6321226755777994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,float16,127,0.6363360087076823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,float16,127,0.6422186692555746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,128,1,float16,fp8,127,0.6378186543782552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,64,0,1,float16,fp8,127,0.6378293434778849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,1,0.09371200203895569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,1,0.09447999795277913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,3,0.09367466966311137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,3,0.09434666236241658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,7,0.09408533573150635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,7,0.09363200267155965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,15,0.09362133344014485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,15,0.09334933757781982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,31,0.09358933568000793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,31,0.09475732843081157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,63,0.09267200032869975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,63,0.09264000256856282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,127,0.0938933293024699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,127,0.09358933568000793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,1,0.14109866817792258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,1,0.14128533005714417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,1,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,3,0.1410719950993856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,3,0.14121599992116293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,3,0.03826133410135905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,3,0.03822399924198786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,7,0.1416106621424357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,7,0.141375998655955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,7,0.038133333126703896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,7,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,15,0.1418773333231608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,1,0.03833599885304769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,15,0.14174933234850565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,15,0.038245332737763725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,15,0.03823466598987579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,31,0.14219199617703757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,31,0.03811733424663544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,31,0.03810133288304011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,63,0.14203199744224548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,63,0.14206399520238241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,63,0.03794133414824804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,63,0.037834666669368744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,127,0.14333867033322653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,127,0.14335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,127,0.051327998439470925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,127,0.05169066786766052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,31,0.14246933658917746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,1,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,1,0.013381333400805792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,1,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,1,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,3,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,3,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,3,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,3,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,7,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,7,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,7,0.016634666671355564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,7,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,15,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,15,0.013605333864688873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,15,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,15,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,31,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,31,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,31,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,31,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,63,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,63,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,63,0.015685333559910457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,63,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,127,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,127,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,127,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,127,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,255,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,255,0.017840000490347546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,255,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,255,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,511,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,511,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,511,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,511,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,1023,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,1023,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,1023,0.01575999955336253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,1023,0.041264000038305916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,2047,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,2047,0.07004799942175548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,2047,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,2047,0.07292800148328145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,4095,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,4095,0.12973866860071817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,4095,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,4095,0.13491732875506082
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,8191,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,8191,0.2540053327878316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,8191,0.015749332805474598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,8191,0.2598666747411092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,16383,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,16383,0.5502239863077799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,16383,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,16383,0.5876799821853638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,1,0.013925333817799887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,1,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,1,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,1,0.013701333353916803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,3,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,3,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,3,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,3,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,7,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,7,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,7,0.013397333522637686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,7,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,15,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,15,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,15,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,15,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,31,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,31,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,31,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,31,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,63,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,63,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,63,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,63,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,127,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,127,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,127,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,127,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,255,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,255,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,255,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,255,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,511,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,511,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,511,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,511,0.025578667720158894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,1023,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,1023,0.04030933231115341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,1023,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,1023,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,2047,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,2047,0.07117866476376851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,2047,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,2047,0.07337599992752075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,4095,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,4095,0.13033066193262735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,4095,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,4095,0.13649066289265951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,8191,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,8191,0.25549866755803424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,8191,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,8191,0.2606346607208252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,float16,16383,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,128,1,float16,fp8,16383,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,float16,16383,0.6059626738230387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,64,0,1,float16,fp8,16383,0.5887413422266642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,1,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,1,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,3,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,3,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,7,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,7,0.013701333353916803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,15,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,15,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,31,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,31,0.01463466634353002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,63,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,63,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,127,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,127,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,255,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,255,0.018063999712467194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,511,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,511,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,1023,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,1023,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,2047,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,2047,0.02902399996916453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,4095,0.026047999660174053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,4095,0.0371573343873024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,8191,0.03102933367093404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,8191,0.05288533369700114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,16383,0.03829866647720337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,16383,0.07946666578451793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,1,0.010079999764760336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,1,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,1,0.006437333300709724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,1,0.006271999950210254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,3,0.010512000570694605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,3,0.006474666918317477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,3,0.006384000182151794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,7,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,7,0.010351999973257383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,7,0.006469333544373512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,7,0.006528000036875407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,15,0.010319999729593595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,15,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,15,0.006448000048597653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,15,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,31,0.010170666500926018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,31,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,31,0.006239999706546466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,31,0.006250666454434395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,63,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,63,0.010186666622757912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,63,0.0064106664309899015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,63,0.006693333387374878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,127,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,127,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,127,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,127,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,255,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,255,0.012517333030700684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,3,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,255,0.00785600021481514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,511,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,511,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,511,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,511,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,1023,0.022170667846997578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,1023,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,1023,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,1023,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,2047,0.026922665536403656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,2047,0.0359199990828832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,2047,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,2047,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,4095,0.03408533334732056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,4095,0.05305600166320801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,4095,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,4095,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,8191,0.033887999753157295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,255,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,8191,0.06685333450635274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,8191,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,8191,0.035045333206653595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,16383,0.03403199960788091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,16383,0.08918933073679607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,16383,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,16383,0.05657066901524862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,1,0.8743680318196615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,1,0.8756746451059977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,1,0.8859039942423502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,1,0.8860639731089274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,3,0.8751680056254069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,3,0.8750933011372884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,3,0.8848746617635092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,3,0.8847786585489908
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,7,0.9215520222981771
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,7,0.9207786719004313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,7,0.930565357208252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,7,0.9299626350402832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,15,0.9619786739349365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,15,0.9611252943674723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,15,0.9679466883341471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,15,0.9674133459726969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,31,1.237183968226115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,31,1.2360906600952148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,31,1.244874636332194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,31,1.243066628774007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,63,1.2503146330515544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,63,1.2585493723551433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,63,1.252282698949178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,float16,1,0.8854080041249593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,63,1.2592213153839111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,float16,1,0.8853813012441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,fp8,1,0.8929119904836019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,float16,3,0.8894026279449463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,fp8,1,0.8921919663747152
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,float16,3,0.886303981145223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,fp8,3,0.8924319744110107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,fp8,3,0.8927040100097656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,float16,7,0.9212640126546224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,float16,7,0.9215733210245768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,fp8,7,0.9306666851043701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,float16,15,0.9642666975657145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,float16,15,0.9631146589914957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,fp8,15,0.9686933358510336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,fp8,15,0.9685173034667969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,fp8,7,0.9325973192850748
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,float16,31,1.2419733206431072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,fp8,31,1.244815985361735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,float16,63,1.2755253314971924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,fp8,31,1.2456053098042805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,float16,63,1.278058687845866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,float16,31,1.2442453702290852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,128,1,float16,fp8,63,1.2640426953633626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,64,0,1,float16,fp8,63,1.2639093399047852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,1,0.17613333463668823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,1,0.1766186753908793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,3,0.17892267306645712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,3,0.1764799952507019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,7,0.1752906640370687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,7,0.17511467138926187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,15,0.17667200167973837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,15,0.17749333381652832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,31,0.17759466171264648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,31,0.17746132612228394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,63,0.17642132441202799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,63,0.17644266287485758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,1,0.27327466011047363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,1,0.2741973400115967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,1,0.08211733400821686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,3,0.27327466011047363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,3,0.27427200476328534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,3,0.082805335521698
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,1,0.08249066770076752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,3,0.08241599798202515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,7,0.2737226684888204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,7,0.2746400038401286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,7,0.08230933547019958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,7,0.09039466579755147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,15,0.2751786708831787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,15,0.276309331258138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,15,0.08213333288828532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,15,0.0828959991534551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,31,0.2786719997723897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,31,0.28043200572331745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,31,0.08223466575145721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,31,0.08381866415341695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,63,0.2765973409016927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,63,0.2767893274625142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,63,0.08163199822107951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,1,1.8230346043904622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,1,1.8231412569681804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,1,1.8348746299743652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,1,1.8340479532877605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,63,0.08229333162307739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,3,1.8294986089070637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,3,1.8293546040852864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,3,1.8331839243570964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,3,1.8402986526489258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,7,1.8868746757507324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,7,1.885013262430827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,7,1.8972427050272624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,7,1.8946773211161296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,15,1.9786613782246907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,15,1.9803999265034993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,15,1.9886666933695476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,15,1.9873226483662922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,31,2.5154879887898765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,31,2.5208640098571777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,31,2.5359360376993814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,31,2.537365277608236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,128,1,float16,float16,1,1.8623414039611816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,0,1,float16,float16,1,1.858458677927653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,128,1,float16,fp8,1,1.8685439427693684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,0,1,float16,fp8,1,1.8727893829345703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,128,1,float16,float16,3,1.8647680282592773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,128,1,float16,fp8,3,1.8763093948364258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,0,1,float16,fp8,3,1.8767894109090169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,128,1,float16,float16,7,1.9147946039835613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,0,1,float16,float16,3,1.8671733538309734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,128,1,float16,fp8,7,1.9242614110310872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,0,1,float16,fp8,7,1.9209012985229492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,128,1,float16,float16,15,2.004426638285319
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,0,1,float16,float16,15,2.0051520665486655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,0,1,float16,fp8,15,2.011967976888021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,128,1,float16,float16,31,2.517045338948568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,128,1,float16,fp8,15,2.00982936223348
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,0,1,float16,float16,31,2.5228586196899414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,0,1,float16,float16,7,1.9151733716328938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,128,1,float16,fp8,31,2.538911978403727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,64,0,1,float16,fp8,31,2.538362661997477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,1,0.3487946589787801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,1,0.3472906748453776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,3,0.34879998366038006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,3,0.3492319981257121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,7,0.34864532947540283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,7,0.34940266609191895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,15,0.3491520086924235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,15,0.34970664978027344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,31,0.3498026529947917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,31,0.3479359944661458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,1,0.5505653222401937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,1,0.160261332988739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,1,0.5502773523330688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,3,0.5517013470331827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,1,0.15864533185958862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,3,0.5481173197428385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,7,0.5512373447418213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,3,0.1601653297742208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,3,0.1585973302523295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,7,0.15850667158762613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,7,0.5510666767756144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,7,0.15843199690183005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,15,0.5541813373565674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,15,0.5526986519495646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,15,0.15851199626922607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,15,0.15852800011634827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,31,0.5521546602249146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,31,0.15872533122698465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,1,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,31,0.15846932927767435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,1,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,3,0.018687999496857326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,1,0.018570666511853535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,3,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,1,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,3,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,3,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,7,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,7,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,7,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,7,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,15,0.020010666300853092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,15,0.01993600030740102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,15,0.02015999952952067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,15,0.020090666910012562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,31,0.024800000091393787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,31,0.024842667082945507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,31,0.02497600018978119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,31,0.024885334074497223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,63,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,63,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,63,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,63,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,127,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,127,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,127,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,127,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,255,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,255,0.030181333422660828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,255,0.025813333690166473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,255,0.03052799900372823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,511,0.026000000536441803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,511,0.045082668463389076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,511,0.026000000536441803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,511,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,1023,0.025909334421157837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,1023,0.07390933235486348
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,1023,0.025722667574882507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,1023,0.07569600145022075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,2047,0.025706666211287182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,2047,0.1318986713886261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,2047,0.025727999707063038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,2047,0.13540266950925192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,31,0.5550933281580607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,4095,0.025813333690166473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,4095,0.24732265869776407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,4095,0.25441600879033405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,8191,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,4095,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,8191,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,8191,0.48415998617808026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,1,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,8191,0.49439998467763263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,1,0.01868266612291336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,1,0.018581333259741466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,1,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,3,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,3,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,3,0.018730666488409042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,3,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,7,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,7,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,7,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,7,0.019461333751678467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,15,0.019882666567961376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,15,0.019914666811625164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,15,0.019850666324297588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,15,0.0201706662774086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,31,0.02463999887307485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,31,0.02463999887307485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,31,0.024613333245118458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,63,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,63,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,31,0.02475733309984207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,63,0.02497066557407379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,63,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,127,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,127,0.02554133286078771
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,127,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,127,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,255,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,255,0.02998399982849757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,255,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,255,0.029893333713213604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,511,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,511,0.04444799820582072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,511,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,511,0.045093332727750145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,1023,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,1023,0.0734559992949168
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,1023,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,2047,0.13169599572817484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,1023,0.07529066503047943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,2047,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,2047,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,2047,0.13522666692733765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,4095,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,4095,0.24719999233881632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,4095,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,4095,0.2548533280690511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,float16,8191,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,128,1,float16,fp8,8191,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,float16,8191,0.49369064966837567
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,1,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,64,0,1,float16,fp8,8191,0.4947893222173055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,1,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,3,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,3,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,7,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,7,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,15,0.011434666812419891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,15,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,31,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,31,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,63,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,63,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,127,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,127,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,255,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,255,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,511,0.018602666755517323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,511,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,1023,0.02436800052722295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,2047,0.030586667358875275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,2047,0.04105599969625473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,1023,0.030389333764712017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,4095,0.0386613334218661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,4095,0.06191466748714447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,8191,0.03846933444341024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,8191,0.0776693324247996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,1,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,1,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,1,0.006319999694824219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,1,0.006437333300709724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,3,0.010485333700974783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,3,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,3,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,7,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,7,0.01251199965675672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,7,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,15,0.011274666835864386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,15,0.006533333410819371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,15,0.006735999758044879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,31,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,31,0.010485333700974783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,31,0.00660800002515316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,63,0.006389333556095759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,63,0.0064106664309899015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,127,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,127,0.010490667074918747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,127,0.006927999978264173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,127,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,255,0.018405333161354065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,255,0.01842133328318596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,255,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,255,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,511,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,511,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,511,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,511,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,1023,0.03369066615899404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,1023,0.04042666653792063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,1023,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,1023,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,2047,0.035242666800816856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,2047,0.05130666494369507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,2047,0.009343999748428663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,2047,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,4095,0.03509333233038584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,4095,0.0666293352842331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,4095,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,4095,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,31,0.006458666796485583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,8191,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,63,0.010245333115259806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,63,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,8191,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,1,0.032586666444937386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,1,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,3,0.03504000107447306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,3,0.034602666894594826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,7,0.03518400092919668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,7,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,15,0.04450666904449463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,15,0.04420266548792521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,31,0.0446720023949941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,31,0.044213334719340004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,63,0.04491733511288961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,63,0.04494399825731913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,127,0.054133335749308266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,127,0.05414933462937673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,255,0.08307200173536937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,255,0.08321600159009297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,511,0.1409280002117157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,511,0.14168000221252441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,1023,0.25594667593638104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,1023,0.25778667132059735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,8191,0.08761066198348999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,8191,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,2047,0.48602132002512616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,2047,0.48973333835601807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,4095,0.9533279736836752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,1,0.032560000816980995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,1,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,4095,0.9537920157114664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,3,0.033941333492596946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,3,0.0346666673819224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,7,0.03525333354870478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,7,0.03568000098069509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,15,0.04447466631730398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,15,0.0444160004456838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,31,0.04473066826661428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,31,0.04433066646258036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,63,0.04497066636880239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,63,0.045082668463389076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,127,0.05410666763782501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,127,0.05413866539796194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,255,0.08310399949550629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,255,0.08353066444396973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,511,0.14131733775138855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,511,0.1421119968096415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,1023,0.2579893271128337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,1023,0.255840003490448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,2047,0.4936480124791463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,2047,0.49005866050720215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,float16,4095,0.9585546652475992
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,2,128,0,1,float16,fp8,4095,0.9655146598815918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,1,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,3,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,7,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,15,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,31,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,63,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,127,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,255,0.025573333104451496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,511,0.03587199995915095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,1023,0.05641599992911021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,2047,0.08086933195590973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,4095,0.10710400342941284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,1,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,1,0.007690666864315669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,3,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,7,0.016384000579516094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,7,0.008090666805704435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,15,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,3,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,15,0.008112000301480293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,31,0.015925332903862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,31,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,63,0.01613333324591319
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,127,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,127,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,255,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,255,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,511,0.04809066653251648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,511,0.024906667570273083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,1023,0.06272533535957336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,1023,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,2047,0.08244266609350841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,2047,0.047914668917655945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,4095,0.1183093289534251
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,4095,0.06947733461856842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,63,0.0074560002734263735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,1,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,1,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,3,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,3,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,7,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,7,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,15,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,15,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,31,0.00985599992175897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,31,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,63,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,63,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,127,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,127,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,255,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,255,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,511,0.026261332134405773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,511,0.026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,1023,0.030218665798505146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,1023,0.02996266633272171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,2047,0.041082667807737984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,2047,0.04067199925581614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,4095,0.06146133442719778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,4095,0.06102400024731954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,8191,0.10299733281135559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,8191,0.10104533036549886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,16383,0.18260266383488974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,16383,0.17904533942540488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,1,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,1,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,3,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,3,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,7,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,7,0.008879999940594038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,15,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,15,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,31,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,31,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,63,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,63,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,127,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,127,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,255,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,255,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,511,0.02664000044266383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,511,0.02569066733121872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,1023,0.029834667841593426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,1023,0.02998399982849757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,2047,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,2047,0.04014399896065394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,4095,0.060826669136683144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,4095,0.060191998879114784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,8191,0.1013866662979126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,8191,0.09949866930643718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,float16,16383,0.1818880041440328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,2,128,0,1,float16,fp8,16383,0.17881600062052408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,1,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,3,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,7,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,15,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,31,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,63,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,127,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,255,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,511,0.0281333327293396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,1023,0.030602666238943737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,2047,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,4095,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,8191,0.022064000368118286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,16383,0.02480533222357432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,1,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,1,0.006528000036875407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,3,0.008634666601816813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,3,0.006613333399097125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,7,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,7,0.006570666407545407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,15,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,15,0.006575999781489372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,31,0.00996800015370051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,31,0.00690133310854435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,63,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,63,0.006800000245372455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,127,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,127,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,255,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,255,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,511,0.025888000925381977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,511,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,1023,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,1023,0.009989333028594652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,2047,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,2047,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,4095,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,4095,0.013338666409254074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,8191,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,8191,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,16383,0.030447999636332195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,16383,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,1,0.008192000289758047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,1,0.008207999790708223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,3,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,3,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,7,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,7,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,15,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,15,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,31,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,31,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,63,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,63,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,127,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,127,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,255,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,255,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,511,0.01775466650724411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,511,0.01841066653529803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,1023,0.0277813325325648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,1023,0.028592000404993694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,2047,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,2047,0.048901334404945374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,4095,0.08727999528249104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,4095,0.08889599641164143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,8191,0.1662773291269938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,8191,0.1686826745669047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,16383,0.32443199555079144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,1,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,16383,0.3277226686477661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,1,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,3,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,3,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,7,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,7,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,15,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,15,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,31,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,31,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,63,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,63,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,127,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,127,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,255,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,255,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,511,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,511,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,1023,0.028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,1023,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,2047,0.04818666477998098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,2047,0.04971200227737427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,4095,0.0879146655400594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,4095,0.08846933643023173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,8191,0.16726400454839072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,8191,0.16816532611846924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,float16,16383,0.3269600073496501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,2,128,0,1,float16,fp8,16383,0.326746662457784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,1,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,3,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,7,0.017637333522240322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,15,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,31,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,63,0.018042666216691334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,127,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,255,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,511,0.017765333255132038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,1023,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,2047,0.018122666825850803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,4095,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,8191,0.031034665803114574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,16383,0.03917866696914037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,1,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,1,0.006325333068768184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,3,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,3,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,7,0.008629333227872849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,7,0.006645333642760913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,15,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,15,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,31,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,31,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,63,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,63,0.006927999978264173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,127,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,127,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,255,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,255,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,511,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,511,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,1023,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,1023,0.010154666379094124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,2047,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,2047,0.012448000411192576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,4095,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,4095,0.01469333345691363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,8191,0.03396799912055334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,8191,0.020666666328907013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,16383,0.05009600023428599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,16383,0.03385599950949351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,1,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,1,0.06169599791367849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,3,0.062458669145902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,3,0.06355733176072438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,7,0.0652213344971339
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,7,0.06643733382225037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,15,0.08297599852085114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,15,0.0825386643409729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,31,0.0831413318713506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,31,0.08262399832407634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,63,0.08362666765848796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,63,0.08334400256474812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,127,0.1016533374786377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,127,0.1015786627928416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,255,0.15781333049138388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,255,0.15781866510709128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,511,0.2700960040092468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,511,0.2712053259213765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,1023,0.49375466505686444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,1023,0.49535465240478516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,2047,0.9482239882151285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,2047,0.9447413285573324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,float16,1,0.060458665092786155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,fp8,1,0.06182933350404104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,float16,3,0.062458669145902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,fp8,3,0.06367466847101848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,float16,7,0.06523733337720235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,fp8,7,0.06637333333492279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,float16,15,0.08298666775226593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,fp8,15,0.08275199929873149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,float16,31,0.08366933465003967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,fp8,31,0.08328533172607422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,float16,63,0.0844106674194336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,fp8,63,0.08411733309427898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,float16,127,0.10238400101661682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,fp8,127,0.10246400038401286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,float16,255,0.15795200069745383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,fp8,255,0.15833600362141928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,float16,511,0.2702186703681946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,fp8,511,0.27162132660547894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,float16,1023,0.501205325126648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,float16,2047,0.9558453559875488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,fp8,2047,0.95797332127889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,2,128,0,1,float16,fp8,1023,0.4957919915517171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,1,0.01960533360640208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,3,0.019589333484570186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,7,0.02045866722861926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,15,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,31,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,63,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,127,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,255,0.03801066676775614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,511,0.058703998724619545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,1023,0.07691733539104462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,2047,0.10541333754857381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,1,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,1,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,3,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,3,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,7,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,7,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,15,0.025861332813898723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,15,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,31,0.025759999950726826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,31,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,63,0.025642665723959606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,63,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,127,0.025920001169045765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,127,0.0143306665122509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,255,0.05132266879081726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,255,0.02438933402299881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,511,0.06517333288987477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,511,0.03271466741959254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,1023,0.08157333234945933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,1023,0.04775466521581014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,2047,0.12006933490435283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,2047,0.06910933554172516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,1,0.013327999661366144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,1,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,3,0.01331199953953425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,3,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,7,0.013557333499193192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,7,0.01370666672786077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,15,0.013621332744757334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,31,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,31,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,63,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,63,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,127,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,127,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,255,0.018453333526849747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,15,0.013338666409254074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,255,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,511,0.026015999416510265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,511,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,1023,0.04068800061941147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,1023,0.04113066693147024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,2047,0.07014399766921997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,2047,0.07138133545716603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,4095,0.12945600350697836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,4095,0.13191466530164084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,8191,0.2477440039316813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,8191,0.25193599859873456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,16383,0.48600534598032635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,16383,0.49196799596150714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,1,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,1,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,3,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,3,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,7,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,7,0.014607999473810196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,15,0.013525333255529404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,15,0.013349333157142004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,31,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,31,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,63,0.01575999955336253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,63,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,127,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,127,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,255,0.018474667022625606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,255,0.018351999421914417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,511,0.026202666262785595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,511,0.026005332668622334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,1023,0.04068266600370407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,1023,0.040949332217375435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,2047,0.07029333213965099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,2047,0.07144000132878621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,4095,0.13126400113105774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,4095,0.13193600376447043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,8191,0.25010132789611816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,8191,0.2520959973335266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,float16,16383,0.5155520041783651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,2,128,0,1,float16,fp8,16383,0.4918239911397298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,1,0.013370666652917862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,3,0.013349333157142004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,7,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,15,0.013381333400805792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,31,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,63,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,127,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,255,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,1023,0.016447999825080235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,511,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,2047,0.023999998966852825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,4095,0.029109333952267964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,8191,0.04417066772778829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,16383,0.06633066634337108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,1,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,1,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,3,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,3,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,7,0.01339200014869372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,7,0.006464000170429547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,15,0.013418667018413544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,15,0.00679466687142849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,31,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,31,0.0069440001000960665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,63,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,63,0.006581333155433337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,127,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,127,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,255,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,255,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,511,0.013482666263977686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,511,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,1023,0.020224000016848247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,1023,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,2047,0.028399998943010967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,2047,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,4095,0.0406986673672994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,4095,0.023775999744733173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,8191,0.05859733124574026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,8191,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,16383,0.0867039958635966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,16383,0.052058666944503784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,1,0.11432533462842305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,1,0.11718933780988057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,3,0.12053866187731425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,3,0.12286399801572163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,7,0.12595199545224509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,7,0.1276693344116211
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,15,0.16084800163904825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,15,0.1609280010064443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,31,0.16139733791351318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,31,0.16035200158754984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,63,0.16222400466601053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,63,0.16176533699035645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,127,0.19771732886632284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,127,0.19844265778859457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,255,0.309386670589447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,255,0.31099732716878253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,511,0.5337493419647217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,511,0.5342986583709717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,1023,0.9873387018839518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,1023,0.98089599609375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,float16,1,0.11628799637158711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,fp8,1,0.11815466483434041
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,float16,3,0.12020799517631531
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,fp8,3,0.12328533331553142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,float16,7,0.12616533041000366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,fp8,7,0.12838400403658548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,float16,15,0.1609386702378591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,fp8,15,0.16044800480206808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,float16,31,0.1613866686820984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,fp8,31,0.1605226695537567
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,float16,63,0.1625653306643168
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,fp8,63,0.16200533509254456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,float16,127,0.19818133115768433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,fp8,127,0.19808000326156616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,float16,255,0.30959999561309814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,fp8,255,0.3100586732228597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,float16,511,0.5406826734542847
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,fp8,511,0.5349760055541992
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,float16,1023,0.9890399773915609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,2,128,0,1,float16,fp8,1023,0.9924213091532389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,1,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,3,0.03418133407831192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,7,0.03422933320204417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,15,0.03691199918588003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,31,0.03429866582155228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,63,0.03385066737731298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,127,0.035631999373435974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,255,0.06731200218200684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,511,0.0820906658967336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,1023,0.10613333185513814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,1,0.04597333570321401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,1,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,3,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,7,0.04600533346335093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,7,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,15,0.04621866842110952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,15,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,3,0.04600533346335093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,31,0.04629333317279816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,63,0.04589866598447164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,63,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,127,0.046725332736968994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,127,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,255,0.055546666185061135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,255,0.028362666567166645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,511,0.07293333113193512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,511,0.04287999868392944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,1023,0.10602666934331258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,1023,0.06526933113733928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,1,0.22223466634750366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,31,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,1,0.2281386653582255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,3,0.23813333113988241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,3,0.23397332429885864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,7,0.24531733989715576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,7,0.24996799230575562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,15,0.31623999277750653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,15,0.31524266799290973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,31,0.3163893421490987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,31,0.3148266673088074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,63,0.3189760049184163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,63,0.31809600194295246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,127,0.38950932025909424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,127,0.389850656191508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,255,0.6119253238042196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,float16,1,0.2241706649462382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,255,0.6117546558380127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,fp8,1,0.22990934054056802
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,float16,3,0.23491199811299643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,fp8,3,0.2385973334312439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,float16,7,0.24810133377710977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,fp8,7,0.2502400080362956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,float16,15,0.3177066644032796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,fp8,15,0.315610667069753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,float16,31,0.3169333338737488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,fp8,31,0.3154720067977905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,float16,63,0.3192373315493266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,fp8,63,0.3187573353449504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,fp8,127,0.39238933722178143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,float16,255,0.6245119969050089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,fp8,255,0.6144693295160929
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,2,128,0,1,float16,float16,127,0.3912320137023926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,1,0.06171200176080068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,3,0.06339733302593231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,7,0.062128002444903054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,15,0.061247999469439186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,31,0.061664000153541565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,63,0.06388799846172333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,127,0.06312533219655354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,255,0.07206400235493977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,1,0.08434133728345235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,1,0.030799999833106995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,3,0.0844693382581075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,3,0.030207999050617218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,7,0.08458133538564046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,7,0.03033066789309184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,15,0.08496532837549846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,15,0.030250666042168934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,31,0.08530666430791219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,31,0.030346666773160298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,63,0.085999995470047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,63,0.030133334298928578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,127,0.08714133501052856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,127,0.04185600082079569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,255,0.10035199920336406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,255,0.05568000177542368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,1,0.4373866717020671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,1,0.4490613142649333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,3,0.46245865027109784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,7,0.48689599831899005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,7,0.49578134218851727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,3,0.4708053270975749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,15,0.6276533206303915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,15,0.6247359911600748
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,31,0.628437320391337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,31,0.6249653498331705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,63,0.6354293425877889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,63,0.6316426595052084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,127,0.7867786884307861
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,127,0.7748586336771647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,float16,1,0.44228800137837726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,fp8,1,0.4527413447697957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,float16,3,0.4622773329416911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,fp8,3,0.4712053140004476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,float16,7,0.48633066813151044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,fp8,7,0.494917352994283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,float16,15,0.6285866498947144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,fp8,15,0.6253439982732137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,fp8,31,0.6254186630249023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,float16,63,0.6488800048828125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,float16,127,0.7984960079193115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,fp8,127,0.7909973462422689
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,float16,31,0.6314506530761719
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,2,128,0,1,float16,fp8,63,0.6333866516749064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,1,0.11412266890207927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,7,0.11442666252454121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,3,0.11618666847546895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,15,0.11421866218249004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,31,0.11565867066383362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,63,0.1160533328851064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,127,0.11533866326014201
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,1,0.1584106683731079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,1,0.06976533432801564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,3,0.15821333726247153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,3,0.06972800195217133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,7,0.15922666589419046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,7,0.0697813332080841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,15,0.1602026621500651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,15,0.06981866558392842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,31,0.16108799974123636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,31,0.06974933544794719
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,63,0.16090666254361471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,63,0.06982400019963582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,127,0.16102400422096252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,127,0.0817386656999588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,1,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,1,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,3,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,3,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,7,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,7,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,15,0.014538666854302088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,15,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,31,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,31,0.014607999473810196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,63,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,63,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,127,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,127,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,255,0.02470933397610982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,255,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,511,0.0399893323580424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,511,0.041120000183582306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,1023,0.07028799752394359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,1023,0.0728053351243337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,2047,0.13036800424257913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,2047,0.13502400120099387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,4095,0.24969067176183066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,4095,0.26052266359329224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,8191,0.5002880096435547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,8191,0.5126239856084188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,16383,1.1969493230183919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,16383,1.2087946732838948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,1,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,1,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,3,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,3,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,7,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,7,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,15,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,15,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,31,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,31,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,63,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,63,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,127,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,127,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,255,0.02475733309984207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,255,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,511,0.039936001102129616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,511,0.041221333046754204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,1023,0.07050666709740956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,1023,0.0729973316192627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,2047,0.13145066301027933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,2047,0.13501866658528647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,4095,0.25306133429209393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,4095,0.25938665866851807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,8191,0.510047992070516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,8191,0.5118293364842733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,float16,16383,1.3136746883392334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,2,128,0,1,float16,fp8,16383,1.3222506841023762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,1,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,3,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,15,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,7,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,31,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,63,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,127,0.018485333770513535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,255,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,511,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,1023,0.022826666633288067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,2047,0.033733333150545754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,4095,0.052298665046691895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,16383,0.1081813375155131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,8191,0.07366933425267537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,1,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,1,0.006741333131988843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,3,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,3,0.006720000257094701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,7,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,15,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,15,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,31,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,31,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,63,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,63,0.00690133310854435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,127,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,255,0.01379199946920077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,255,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,511,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,511,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,1023,0.028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,1023,0.018138666947682697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,2047,0.04242666562398275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,2047,0.021856000026067097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,4095,0.06620266536871593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,4095,0.03382933388153712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,8191,0.08365866541862488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,8191,0.04765866696834564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,16383,0.11833066741625468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,16383,0.0711839993794759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,7,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,1,0.9212426344553629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,1,0.9410080115000407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,3,0.9469226996103922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,3,0.9636800289154053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,7,0.9970719814300537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,7,1.0138400395711262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,15,1.2695786952972412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,15,1.2643040021260579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,31,1.276421308517456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,31,1.267584005991618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,63,1.287178675333659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,63,1.2820160388946533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,float16,1,0.937818686167399
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,fp8,1,0.9596479733784994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,float16,3,0.9636639753977457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,fp8,3,0.9791680177052816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,float16,7,1.0088160037994385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,fp8,7,1.026362657546997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,float16,15,1.2722293535868328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,fp8,15,1.2677706877390544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,float16,31,1.2817333539326985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,fp8,31,1.272447983423869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,float16,63,1.2950293223063152
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,2,128,0,1,float16,fp8,63,1.2894559701283772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,1,0.22223466634750366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,3,0.22292266289393106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,7,0.22165334224700928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,15,0.22073600689570108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,31,0.22261333465576172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,63,0.21917865673700967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,1,0.12999467055002847
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,1,0.31091733773549396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,3,0.31244800488154095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,7,0.13053866227467856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,15,0.31426666180292767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,7,0.31193600098292035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,15,0.13010133306185404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,31,0.3131786584854126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,31,0.13016000390052795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,63,0.3105120062828064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,63,0.13029866417249045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,3,0.12998400131861368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,1,1.8400373458862305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,1,1.8815199534098308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,3,1.8897493680318196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,3,1.925813357035319
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,7,1.989781379699707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,7,2.024837334950765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,15,2.5376906394958496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,15,2.525397300720215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,0,1,float16,float16,1,1.873696009318034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,31,2.527023951212565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,31,2.545263926188151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,0,1,float16,fp8,1,1.9171733856201172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,0,1,float16,float16,3,1.920581340789795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,0,1,float16,fp8,3,1.9540106455485027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,0,1,float16,fp8,7,2.0480586687723794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,0,1,float16,float16,7,2.011242707570394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,0,1,float16,float16,15,2.5385707219441733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,0,1,float16,fp8,15,2.5298026402791343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,0,1,float16,float16,31,2.559845288594564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,2,128,0,1,float16,fp8,31,2.5409226417541504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,1,0.43454933166503906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,7,0.43478933970133465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,3,0.4360586802164714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,15,0.4395039876302083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,31,0.4344746669133504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,1,0.6159253517786661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,1,0.2499199906984965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,3,0.6159679889678955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,3,0.24827200174331665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,7,0.6157866716384888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,7,0.250602662563324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,15,0.6165279944737753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,15,0.250325342019399
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,31,0.248416006565094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,31,0.6161119937896729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,1,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,3,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,1,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,3,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,7,0.01987733319401741
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,7,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,15,0.024832000335057575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,15,0.024735999604066212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,31,0.02496533344189326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,31,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,63,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,63,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,127,0.02977066735426585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,127,0.02977066735426585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,255,0.04457066456476847
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,255,0.04462933540344238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,511,0.07392000158627827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,511,0.07520000139872234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,1023,0.1323946714401245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,1023,0.13476799925168356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,2047,0.24936532974243164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,2047,0.25408534208933514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,4095,0.48308265209198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,4095,0.4923199812571208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,8191,0.9684586524963379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,8191,0.9734026590983073
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,1,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,1,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,3,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,3,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,7,0.019776000330845516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,7,0.019914666811625164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,15,0.024826665719350178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,15,0.02462399999300639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,31,0.024853333830833435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,31,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,63,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,63,0.024842667082945507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,127,0.02985599885384242
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,127,0.03028800090154012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,255,0.04442666471004486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,255,0.04469866553942362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,511,0.0738560010989507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,511,0.07499200105667114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,1023,0.13244266311327615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,1023,0.13455999890963236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,2047,0.250383992989858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,2047,0.2539253234863281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,4095,0.4925493399302165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,4095,0.4921120007832845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,float16,8191,0.9773813088734945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,2,128,0,1,float16,fp8,8191,0.9853333632151285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,1,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,3,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,7,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,31,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,15,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,63,0.011952000359694162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,127,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,255,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,511,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,1023,0.03350933392842611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,2047,0.05233600238958994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,4095,0.08241066833337148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,8191,0.10659199953079224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,1,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,1,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,3,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,7,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,7,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,15,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,15,0.007525333513816197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,31,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,31,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,63,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,3,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,63,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,127,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,255,0.02201066662867864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,511,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,511,0.017727999637524288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,1023,0.04691733419895172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,1023,0.025994665920734406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,2047,0.06457599997520447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,2047,0.03252800057331721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,4095,0.08250666658083598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,4095,0.048725331823031105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,8191,0.11717333396275838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,8191,0.0724426656961441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,1,0.02369600037733714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,1,0.02378133436044057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,1,0.02402666707833608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,1,0.02399466683467229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,3,0.024058667321999867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,3,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,3,0.0240639994541804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,3,0.023717333873112995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,7,0.026752000053723652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,7,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,7,0.024821333587169647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,7,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,15,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,15,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,15,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,15,0.025759999950726826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,31,0.03181866556406021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,31,0.03180266668399175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,31,0.031888000667095184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,31,0.031898667414983116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,63,0.032058666149775185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,63,0.03216533362865448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,63,0.03225066761175791
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,63,0.03226666649182638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,127,0.0324799989660581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,127,0.03256533294916153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,127,0.032458665470282234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,127,0.03257066756486893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,255,0.0324799989660581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,255,0.038704000413417816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,255,0.03234133372704188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,255,0.03858133405447006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,511,0.03246400008598963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,511,0.057861333092053734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,511,0.0322826678554217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,511,0.05878399809201559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,1023,0.03249066571394602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,1023,0.09627733627955119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,1023,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,1023,0.0986346701780955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,2047,0.03218133250872294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,2047,0.17286400000254312
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,2047,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,2047,0.17718400557835898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,4095,0.03214933226505915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,4095,0.32547734181086224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,4095,0.03221333275238673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,4095,0.3345653216044108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,8191,0.032229334115982056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,8191,0.03216533362865448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,8191,0.6478720108668009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,8191,0.649727980295817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,1,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,1,0.011930666863918304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,1,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,3,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,3,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,3,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,3,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,7,0.011952000359694162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,7,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,7,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,7,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,15,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,15,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,15,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,15,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,31,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,31,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,31,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,31,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,63,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,1,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,63,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,63,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,63,0.008394666636983553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,127,0.011637333780527115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,127,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,127,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,127,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,255,0.014005333185195923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,255,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,255,0.01003200002014637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,255,0.010202666744589806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,511,0.019727999965349834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,511,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,511,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,1023,0.025781333446502686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,1023,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,1023,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,1023,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,2047,0.03239466746648153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,2047,0.04287466903527578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,2047,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,2047,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,4095,0.040218666195869446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,4095,0.06311466793219249
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,4095,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,4095,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,8191,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,8191,0.08067200084527333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,8191,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,8191,0.05640000104904175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,511,0.020234666764736176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,1,0.010501333822806677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,1,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,1,0.006746666505932808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,3,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,1,0.006746666505932808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,3,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,7,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,3,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,7,0.010437333335479101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,7,0.006773333375652631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,7,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,15,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,15,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,15,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,15,0.01657066618402799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,31,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,31,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,31,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,31,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,3,0.007536000261704127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,63,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,63,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,63,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,63,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,127,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,127,0.0075093333919843035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,127,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,127,0.007930666829148928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,255,0.018474667022625606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,255,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,255,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,255,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,511,0.02351466566324234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,511,0.026005332668622334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,511,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,511,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,1023,0.0341333324710528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,1023,0.04019733270009359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,1023,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,1023,0.02053333322207133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,2047,0.03554133325815201
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,2047,0.05276266733805338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,2047,0.010357333347201347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,2047,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,4095,0.03616533428430557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,4095,0.06715733309586842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,4095,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,4095,0.03454933315515518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,8191,0.036229332288106285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,8191,0.08867733677228291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,8191,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,8191,0.05622933308283488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,1,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,1,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,1,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,1,0.006405333057045937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,3,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,3,0.0063146669417619705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,7,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,7,0.006298666819930077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,15,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,15,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,15,0.006544000158707301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,15,0.007087999954819679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,31,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,31,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,31,0.006490666419267654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,31,0.0063573333124319715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,63,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,63,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,3,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,63,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,63,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,127,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,127,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,127,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,127,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,255,0.026789332429567974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,255,0.0269813338915507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,255,0.008634666601816813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,3,0.006213333457708359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,255,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,511,0.040965333580970764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,7,0.014309333016475042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,511,0.010501333822806677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,1023,0.03666666646798452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,1023,0.05011733373006185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,1023,0.010399999717871347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,1023,0.0206133338312308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,2047,0.0391839991013209
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,2047,0.06784533460934956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,7,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,2047,0.010559999694426855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,2047,0.028602667152881622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,4095,0.039173332353432976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,4095,0.08912000060081482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,4095,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,4095,0.04637333254019419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,8191,0.03905066599448522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,8191,0.13288533687591553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,511,0.03579200059175491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,8191,0.010079999764760336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,1,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,8191,0.07901866734027863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,1,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,511,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,1,0.009488000224033991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,1,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,3,0.0085333331177632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,3,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,3,0.008309333274761835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,7,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,3,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,7,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,7,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,7,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,15,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,15,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,15,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,15,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,31,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,31,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,31,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,31,0.009839999799927076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,63,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,63,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,63,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,63,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,127,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,127,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,127,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,127,0.011407999942700068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,255,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,255,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,255,0.011920000116030375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,255,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,511,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,511,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,511,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,511,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,1023,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,1023,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,1023,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,1023,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,2047,0.020762667059898376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,2047,0.029167999823888142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,2047,0.0205226664741834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,2047,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,4095,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,4095,0.035445332527160645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,4095,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,4095,0.03457599878311157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,8191,0.02041600023706754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,8191,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,8191,0.0481333335240682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,8191,0.048565333088239036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,16383,0.020432000358899433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,16383,0.020469332734743755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,16383,0.07547733187675476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,16383,0.07518399755160014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,32767,0.12801600495974222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,32767,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,32767,0.02048533285657565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,32767,0.12723732988039652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,1,0.008250666782259941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,1,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,3,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,3,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,7,0.00814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,7,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,7,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,7,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,15,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,15,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,15,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,15,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,31,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,31,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,31,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,31,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,63,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,63,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,63,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,63,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,1,0.007834666719039282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,127,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,3,0.007631999750932057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,127,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,127,0.01573333392540614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,255,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,1,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,255,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,255,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,255,0.009904000287254652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,511,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,511,0.010288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,511,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,1023,0.01613333324591319
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,1023,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,1023,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,2047,0.020821332931518555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,2047,0.028138667345046997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,3,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,2047,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,2047,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,127,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,4095,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,4095,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,4095,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,4095,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,8191,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,8191,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,8191,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,8191,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,16383,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,16383,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,16383,0.020453333854675293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,16383,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,511,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,32767,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,32767,0.022474666436513264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,1023,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,32767,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,32767,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,1,0.008245333408315977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,1,0.00730666642387708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,3,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,3,0.007349333415428798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,3,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,7,0.008261333530147871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,7,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,7,0.007349333415428798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,7,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,15,0.008192000289758047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,15,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,15,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,15,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,31,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,31,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,31,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,31,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,63,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,63,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,63,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,63,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,1,0.007717333113153775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,3,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,1,0.006400000303983688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,127,0.009488000224033991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,127,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,127,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,255,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,255,0.008810666700204214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,255,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,511,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,511,0.010154666379094124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,511,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,511,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,1023,0.01452800010641416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,1023,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,1023,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,1023,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,2047,0.01251199965675672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,2047,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,2047,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,2047,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,4095,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,4095,0.013546666751305262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,4095,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,4095,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,8191,0.013722666849692663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,8191,0.01569066693385442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,8191,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,255,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,127,0.007087999954819679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,8191,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,16383,0.014533333480358124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,16383,0.01657066618402799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,16383,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,16383,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,32767,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,32767,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,32767,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,1,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,1,0.007791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,1,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,1,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,3,0.00816000004609426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,3,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,3,0.007381333038210869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,7,0.007893333211541176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,3,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,7,0.008261333530147871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,7,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,7,0.006319999694824219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,15,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,15,0.008250666782259941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,15,0.007482666522264481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,15,0.006464000170429547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,31,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,31,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,31,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,31,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,63,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,63,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,63,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,127,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,32767,0.024373332659403484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,127,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,127,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,127,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,255,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,255,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,511,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,255,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,255,0.008250666782259941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,511,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,511,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,511,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,1023,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,1023,0.012495999534924826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,1023,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,1023,0.008069333309928576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,63,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,2047,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,2047,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,2047,0.013552000125249227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,8191,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,2047,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,4095,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,4095,0.01349866638580958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,4095,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,8191,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,8191,0.0206986665725708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,8191,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,16383,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,16383,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,16383,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,16383,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,32767,0.020666666328907013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,32767,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,32767,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,1,0.008037333066264788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,1,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,4095,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,1,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,1,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,3,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,3,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,3,0.009578666960199675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,7,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,3,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,7,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,7,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,7,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,32767,0.02269333352645238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,15,0.008405333384871483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,15,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,15,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,63,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,15,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,31,0.008101333553592363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,31,0.008069333309928576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,63,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,31,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,127,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,31,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,63,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,63,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,127,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,127,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,511,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,127,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,511,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,255,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,255,0.009925333162148794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,1023,0.022240000466505688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,255,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,255,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,511,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,1023,0.021930667261282604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,1023,0.026181332767009735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,511,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,1023,0.026533332963784535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,2047,0.022384000321229298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,2047,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,2047,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,2047,0.03050133337577184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,4095,0.021925332645575207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,4095,0.040720000863075256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,4095,0.02248000105222066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,4095,0.04077333211898804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,16383,0.021829334398110706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,16383,0.101200004418691
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,8191,0.021957332889238994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,8191,0.0612960010766983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,8191,0.021888000269730885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,8191,0.06057600180308024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,16383,0.022250667214393616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,16383,0.10016000270843506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,32767,0.02180800090233485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,32767,0.18132799863815308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,32767,0.02184533327817917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,32767,0.18023467063903809
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,1,0.008117333054542542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,3,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,1,0.007802666475375493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,1,0.008197333042820295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,3,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,3,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,7,0.008101333553592363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,7,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,7,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,7,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,1,0.007717333113153775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,15,0.007877333089709282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,15,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,3,0.007834666719039282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,15,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,15,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,31,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,31,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,31,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,31,0.008127999802430471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,63,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,63,0.008309333274761835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,63,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,127,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,127,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,127,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,63,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,127,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,255,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,255,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,1023,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,255,0.010245333115259806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,255,0.010368000095089277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,511,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,2047,0.013973332941532135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,511,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,511,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,1023,0.026250667870044708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,1023,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,1023,0.012389333297808966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,2047,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,2047,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,2047,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,511,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,4095,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,4095,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,4095,0.009872000043590864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,4095,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,8191,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,8191,0.018042666216691334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,8191,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,8191,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,16383,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,16383,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,16383,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,16383,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,1,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,32767,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,32767,0.0317546675602595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,32767,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,32767,0.02977066735426585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,1,0.007776000226537387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,1,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,1,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,3,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,3,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,3,0.006522666662931442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,7,0.007818666597207388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,15,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,7,0.006400000303983688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,7,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,15,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,31,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,15,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,15,0.006570666407545407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,31,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,31,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,7,0.008021333565314611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,31,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,3,0.006517333288987477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,63,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,63,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,127,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,63,0.006650666395823161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,63,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,255,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,127,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,127,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,127,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,255,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,255,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,255,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,511,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,511,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,511,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,511,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,1023,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,1023,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,1023,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,2047,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,2047,0.007727999861041705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,2047,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,2047,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,4095,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,8191,0.0207893339296182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,8191,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,4095,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,4095,0.008026666939258575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,1023,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,4095,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,8191,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,8191,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,16383,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,16383,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,16383,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,16383,0.02019199977318446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,32767,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,32767,0.0358240008354187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,32767,0.008037333066264788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,32767,0.030031998952229817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,1,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,1,0.008378666515151659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,1,0.005749333028992017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,3,0.007877333089709282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,3,0.005685333162546158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,3,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,15,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,7,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,7,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,7,0.005754666402935982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,7,0.005829333638151486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,15,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,1,0.007989333321650824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,15,0.005557333429654439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,15,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,31,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,31,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,31,0.006048000107208888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,3,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,31,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,127,0.0064853330453236895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,63,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,127,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,63,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,255,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,63,0.00730666642387708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,63,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,127,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,127,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,255,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,255,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,255,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,511,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,511,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,2047,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,511,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,2047,0.018378666291634243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,2047,0.007765333478649457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,511,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,1023,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,1023,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,1023,0.008133333176374435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,2047,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,4095,0.01651200031240781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,4095,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,4095,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,4095,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,8191,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,8191,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,8191,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,8191,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,16383,0.022543999056021374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,16383,0.03708266715208689
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,16383,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,1023,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,16383,0.020586666961510975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,32767,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,32767,0.05996799965699514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,32767,0.008197333042820295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,32767,0.0352906659245491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,1,0.042490666111310325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,1,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,1,0.04250133534272512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,1,0.04237333436806997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,3,0.04318400224049886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,3,0.04298133154710134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,3,0.042853335539499916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,3,0.04268266757329305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,7,0.04398933549722036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,7,0.04383466641108195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,7,0.044079999128977455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,7,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,15,0.0461706668138504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,15,0.045941332976023354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,15,0.045781334241231285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,15,0.04589866598447164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,31,0.057018667459487915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,31,0.05691733459631602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,31,0.05749333401521047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,31,0.05831466615200043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,63,0.058320000767707825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,63,0.05816000203291575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,63,0.058261334896087646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,63,0.05844266712665558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,127,0.058506667613983154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,127,0.0584799995024999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,127,0.058693334460258484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,127,0.059530665477116905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,255,0.05926933387915293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,255,0.07125866909821828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,255,0.05871999760468801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,1023,0.05941333373387655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,255,0.07061866422494252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,511,0.05893866717815399
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,511,0.1081119974454244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,511,0.05874133110046387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,511,0.1104213297367096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,1023,0.18488534291585287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,1023,0.05876799921194712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,1023,0.18862932920455933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,2047,0.0590826670328776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,4095,0.6404906511306763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,2047,0.33473066488901776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,2047,0.05879466732343038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,2047,0.3420426845550537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,4095,0.05978666742642721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,4095,0.058703998724619545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,1,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,4095,0.6517920096715292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,1,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,1,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,1,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,7,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,7,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,3,0.012453333785136541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,3,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,3,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,15,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,3,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,7,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,7,0.008378666515151659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,15,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,15,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,15,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,31,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,31,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,31,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,31,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,127,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,63,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,63,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,63,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,63,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,127,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,127,0.012426666915416718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,127,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,255,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,255,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,255,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,255,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,511,0.02788266787926356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,511,0.029861333469549816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,511,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,511,0.01977066695690155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,1023,0.03909866760174433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,1023,0.0481333335240682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,1023,0.012175999581813812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,1023,0.028181334336598713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,2047,0.0421973317861557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,2047,0.062208001812299095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,2047,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,2047,0.03718933214743932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,4095,0.04136000076929728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,4095,0.08059733112653096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,4095,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,4095,0.05366933345794678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,1,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,1,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,1,0.0075093333919843035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,1,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,3,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,3,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,3,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,3,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,7,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,7,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,7,0.007802666475375493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,7,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,15,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,15,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,15,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,15,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,31,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,31,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,31,0.007226666435599327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,31,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,63,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,63,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,63,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,127,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,127,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,127,0.008154666672150293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,127,0.007903999959429106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,255,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,255,0.02808533360560735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,255,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,255,0.011407999942700068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,511,0.037274666130542755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,511,0.04104000081618627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,511,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,1023,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,1023,0.050255998969078064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,1023,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,1023,0.02438933402299881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,2047,0.03902400036652883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,4095,0.08956799904505412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,4095,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,63,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,2047,0.06738133231798808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,1,0.024314666787783306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,2047,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,2047,0.034559999903043113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,4095,0.039173332353432976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,4095,0.05559466779232025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,1,0.023749334116776783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,1,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,1,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,3,0.02430933217207591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,3,0.024005333582560223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,3,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,3,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,7,0.02369066576162974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,7,0.023792001108328503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,7,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,7,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,15,0.02438933402299881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,15,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,15,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,15,0.007120000198483467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,31,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,31,0.023818666736284893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,31,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,31,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,63,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,63,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,63,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,63,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,127,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,127,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,511,0.04340266684691111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,127,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,127,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,255,0.044341335693995156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,255,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,255,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,255,0.015834666788578033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,511,0.052202666799227394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,511,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,511,0.020175999651352566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,2047,0.013621332744757334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,1023,0.043381333351135254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,1023,0.06654933094978333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,1023,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,1023,0.029125332832336426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,2047,0.0460746685663859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,2047,0.09057066837946574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,1,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,2047,0.04651199777921041
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,4095,0.045781334241231285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,4095,0.13343466321627298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,4095,0.014229333649079004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,4095,0.07935466865698497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,1,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,1,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,1,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,3,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,3,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,3,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,3,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,7,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,7,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,7,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,7,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,15,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,31,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,15,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,15,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,63,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,15,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,127,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,31,0.010202666744589806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,31,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,31,0.009488000224033991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,63,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,63,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,63,0.011637333780527115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,127,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,127,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,255,0.01157333329319954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,255,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,255,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,255,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,511,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,511,0.013552000125249227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,511,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,2047,0.02868266652027766
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,511,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,1023,0.011621333658695221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,1023,0.018687999496857326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,1023,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,1023,0.01811733345190684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,2047,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,2047,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,8191,0.08849066495895386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,2047,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,4095,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,4095,0.04855999847253164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,4095,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,4095,0.049957334995269775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,8191,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,8191,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,8191,0.09213866790135701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,32767,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,16383,0.011930666863918304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,16383,0.1683839956919352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,16383,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,16383,0.17532267173131308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,32767,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,32767,0.36948267618815106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,32767,0.38598934809366864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,1,0.008405333384871483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,1,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,1,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,1,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,3,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,3,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,3,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,3,0.008341333518425623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,7,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,7,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,7,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,7,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,15,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,15,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,15,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,31,0.009477333476146063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,31,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,31,0.00847999999920527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,31,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,63,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,63,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,63,0.008309333274761835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,63,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,127,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,127,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,127,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,127,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,255,0.01191466674208641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,255,0.011434666812419891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,255,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,255,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,511,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,511,0.013621332744757334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,511,0.010421333213647207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,1023,0.013909333695967993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,1023,0.013642666240533194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,1023,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,1023,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,2047,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,2047,0.01632533346613248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,2047,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,8191,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,2047,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,4095,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,4095,0.01651200031240781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,4095,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,4095,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,8191,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,8191,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,8191,0.02218666672706604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,16383,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,16383,0.029109333952267964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,16383,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,16383,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,32767,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,1,0.007391999786098798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,32767,0.044490665197372437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,32767,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,1,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,32767,0.038704000413417816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,1,0.006293333445986112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,1,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,7,0.006522666662931442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,3,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,3,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,3,0.008432000254591307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,3,0.006351999938488007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,7,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,7,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,7,0.006810666372378667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,15,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,15,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,15,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,15,0.008592000231146812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,63,0.007349333415428798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,31,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,31,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,31,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,31,0.006666666517655055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,63,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,63,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,63,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,127,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,127,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,127,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,127,0.007978666573762894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,255,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,255,0.011274666835864386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,255,0.008432000254591307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,255,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,511,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,511,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,511,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,2047,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,511,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,1023,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,1023,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,1023,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,1023,0.01022933361430963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,2047,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,2047,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,2047,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,4095,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,4095,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,4095,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,4095,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,8191,0.020469332734743755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,8191,0.029093332588672638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,8191,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,8191,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,16383,0.024010665714740753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,16383,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,32767,0.04398400088151296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,1,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,16383,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,16383,0.02644266684850057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,32767,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,32767,0.06225599845250448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,32767,0.008634666601816813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,1,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,1,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,7,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,1,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,3,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,3,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,3,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,3,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,7,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,7,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,7,0.006058666855096817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,15,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,15,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,15,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,15,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,31,0.01258133351802826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,31,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,127,0.012234666695197424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,31,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,31,0.006224000205596288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,127,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,63,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,63,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,63,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,63,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,127,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,127,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,255,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,255,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,255,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,255,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,511,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,511,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,511,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,511,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,1023,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,1023,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,1023,0.007957333077987036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,1023,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,2047,0.020058666666348774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,2047,0.024266667664051056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,2047,0.00847999999920527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,2047,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,4095,0.023914667467276256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,4095,0.033002667129039764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,4095,0.008341333518425623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,4095,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,8191,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,8191,0.04513066510359446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,8191,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,8191,0.025797332326571148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,16383,0.03385599950949351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,16383,0.06809600194295247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,16383,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,16383,0.03234133372704188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,32767,0.03576533248027166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,32767,0.0926026701927185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,32767,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,32767,0.05286400020122528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,1,0.07975466549396515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,1,0.07996266583601634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,1,0.0804799993832906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,1,0.0805920014778773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,3,0.08030933141708374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,3,0.0802293320496877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,3,0.08060266574223836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,3,0.08002133170763652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,7,0.08320533235867818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,7,0.08362666765848796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,7,0.08373333017031352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,7,0.08374933401743571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,15,0.08661333719889323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,15,0.08646933237711589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,15,0.08685333530108134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,15,0.0860640009244283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,31,0.10914132992426555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,31,0.10938666264216106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,31,0.110042671362559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,31,0.11011733611424764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,63,0.11077333490053813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,63,0.11060800155003865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,63,0.11149866382280986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,63,0.11078400413195293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,127,0.11190399527549744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,127,0.11214933792750041
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,127,0.11211199561754863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,127,0.1120693286259969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,255,0.11258133252461751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,255,0.1359999974568685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,255,0.11195733149846394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,255,0.1345866620540619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,511,0.11284266908963521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,511,0.20997333526611328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,511,0.11204800009727478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,511,0.21191465854644775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,1023,0.11307733257611592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,1023,0.3573066790898641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,1023,0.11247467001279195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,1023,0.3636159896850586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,2047,0.112527996301651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,2047,0.6573760112126669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,2047,0.11222400267918904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,2047,0.6642080148061117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,1,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,1,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,1,0.009904000287254652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,1,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,3,0.01842133328318596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,3,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,3,0.009839999799927076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,3,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,7,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,7,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,7,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,7,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,15,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,15,0.01868266612291336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,15,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,15,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,63,0.01833600054184596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,31,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,31,0.020703999946514767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,31,0.009578666960199675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,31,0.009935999910036722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,127,0.010549332946538925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,63,0.018063999712467194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,63,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,63,0.009546666716535887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,127,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,127,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,127,0.010378666842977205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,255,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,255,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,255,0.017674667139848072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,255,0.019914666811625164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,511,0.04451199869314829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,511,0.05082666873931885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,511,0.018133333573738735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,511,0.029077333708604176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,1023,0.04461333155632019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,1023,0.06147199869155884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,1,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,1023,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,1023,0.0364479993780454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,2047,0.0468800018231074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,2047,0.08391466736793518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,2047,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,2047,0.053642665346463524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,1,0.024853333830833435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,1,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,1,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,3,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,3,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,3,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,15,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,3,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,7,0.02481599897146225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,7,0.02492800106604894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,7,0.010314666976531347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,7,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,15,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,15,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,15,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,31,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,63,0.012453333785136541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,63,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,31,0.024773334463437397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,31,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,127,0.010346666599313417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,31,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,255,0.044906665881474815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,63,0.02439466615517934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,63,0.024735999604066212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,127,0.02493866781393687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,127,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,127,0.010341333225369453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,255,0.0450133333603541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,255,0.016490666816631954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,255,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,511,0.04510400195916494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,511,0.054010664423306785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,1023,0.03505066782236099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,511,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,511,0.024656000236670177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,2047,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,1023,0.04497066636880239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,1023,0.06791999936103821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,1023,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,2047,0.04739200075467428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,2047,0.09298666318257649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,2047,0.05680533250172933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,1,0.042175998290379844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,1,0.04155199974775314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,1,0.01201066623131434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,1,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,3,0.04155199974775314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,7,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,3,0.04162666698296865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,3,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,15,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,3,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,7,0.04197333256403605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,7,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,7,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,15,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,15,0.041637333730856575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,15,0.012229333321253458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,31,0.04166933397452036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,31,0.0421013335386912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,31,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,127,0.04102933406829834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,31,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,63,0.040965333580970764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,63,0.04067733387152354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,63,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,63,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,127,0.04166933397452036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,127,0.014389333625634512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,127,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,255,0.042954668402671814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,255,0.04265599946180979
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,255,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,255,0.01833600054184596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,1023,0.07765333354473114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,511,0.04340800146261851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,511,0.055455997586250305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,511,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,511,0.026677332818508148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,1023,0.04322133461634318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,2047,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,1023,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,1023,0.044394666949907936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,2047,0.04530666768550873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,2047,0.12434132893880208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,2047,0.0768746683994929
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,1,0.15307199954986572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,1,0.15330666303634644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,1,0.15438933173815408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,1,0.15411200126012167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,3,0.1532960037390391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,3,0.15330666303634644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,3,0.15422933300336203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,3,0.15432533621788025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,7,0.15993066628774008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,7,0.1602079967657725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,7,0.16119999686876932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,7,0.16100266575813293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,15,0.16612799962361655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,15,0.16616533199946085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,15,0.1668000022570292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,15,0.16687466700871786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,31,0.21191465854644775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,31,0.21203200022379556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,31,0.2132320006688436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,31,0.21341866254806519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,127,0.21709332863489786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,63,0.2139893372853597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,63,0.21412267287572226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,63,0.21597866217295328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,63,0.216154674688975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,255,0.26295467217763263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,127,0.21709332863489786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,127,0.21738133827845255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,127,0.2172213395436605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,255,0.2180160085360209
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,255,0.2648693323135376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,255,0.2171786626180013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,511,0.2190613349278768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,511,0.41012267271677655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,511,0.2172586719195048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,3,0.030799999833106995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,511,0.4145706494649251
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,1,0.030394665896892548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,1,0.031061333914597828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,1,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,7,0.03048533449570338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,1,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,3,0.03030933439731598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,3,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,3,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,7,0.030746666093667347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,7,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,7,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,31,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,15,0.03046400099992752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,15,0.030506665507952373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,15,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,15,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,31,0.030645333230495453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,31,0.030847998956839245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,31,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,63,0.030805334448814392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,127,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,63,0.030224000414212544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,63,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,255,0.024959998826185863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,63,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,127,0.03038399914900462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,127,0.0305226668715477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,127,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,255,0.05619733532269796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,255,0.05503466725349426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,255,0.02867199977238973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,511,0.05494933327039083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,511,0.06656000018119812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,511,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,511,0.036677333215872444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,1,0.043237333496411644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,1,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,1,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,1,0.013536000003417334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,3,0.04252266883850098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,7,0.01357866699496905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,3,0.04271999994913737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,3,0.013503999759753546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,3,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,7,0.042949333786964417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,7,0.043391997615496315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,7,0.01357866699496905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,15,0.04256533086299896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,15,0.0429013321797053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,63,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,15,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,15,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,31,0.043493335445721946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,31,0.04317333300908407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,31,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,31,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,63,0.04228800038496653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,63,0.013280000537633896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,63,0.013327999661366144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,127,0.0425546665986379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,127,0.04203199843565623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,127,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,127,0.01637866720557213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,255,0.044549331068992615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,255,0.04322133461634318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,255,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,255,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,511,0.04423466821511587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,511,0.057722667853037514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,511,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,511,0.032138665517171226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,1,0.0747626672188441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,1,0.07432533303896587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,1,0.019909333437681198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,1,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,3,0.07484266658624013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,7,0.01993600030740102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,3,0.07534933090209961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,15,0.07489599784215291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,15,0.07480533421039581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,3,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,15,0.02022933339079221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,3,0.019909333437681198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,7,0.07487466434637706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,7,0.07421333094437917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,7,0.020021333048741024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,15,0.019920000185569126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,31,0.07499733567237854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,31,0.07441066702206929
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,31,0.02022933339079221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,31,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,63,0.07377600173155467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,63,0.07361599802970886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,63,0.019727999965349834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,63,0.019850666324297588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,127,0.07426666716734569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,127,0.07425599793593089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,127,0.023914667467276256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,127,0.023498666783173878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,255,0.07934933404127757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,255,0.07912000020345052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,255,0.02363733450571696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,255,0.032058666149775185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,511,0.07973333199818929
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,511,0.09918399651845296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,511,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,511,0.04965866605440775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,1,0.2987946669260661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,1,0.29892800251642865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,1,0.30111465851465863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,1,0.301253338654836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,3,0.29954665899276733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,3,0.2999253273010254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,3,0.30169065793355304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,3,0.3015146652857463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,7,0.31293867031733197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,7,0.31311466296513873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,7,0.31565332412719727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,7,0.31565332412719727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,15,0.3256853421529134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,15,0.3259146610895793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,15,0.3278026580810547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,15,0.3277440071105957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,31,0.41656001408894855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,31,0.4167040189107259
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,31,0.4193973143895467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,127,0.42709867159525555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,31,0.4195466836293538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,63,0.4211626847585042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,63,0.4214666684468587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,63,0.4257066647211711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,63,0.4257386525472005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,255,0.4278186559677124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,255,0.5189919869105021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,127,0.42742931842803955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,127,0.42770131429036456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,127,0.427621324857076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,255,0.4286239941914876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,3,0.05400000015894572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,255,0.5225973526636759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,1,0.05329066514968872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,1,0.05349333087603251
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,1,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,1,0.020666666328907013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,3,0.05320533116658529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,3,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,3,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,15,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,7,0.05305066704750061
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,7,0.05337599913279215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,7,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,7,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,15,0.05322133501370748
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,15,0.05333866675694784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,15,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,31,0.05354666709899902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,31,0.0531626691420873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,31,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,63,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,31,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,63,0.052469333012898765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,63,0.05261866748332977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,63,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,127,0.05261866748332977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,127,0.052789335449536644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,127,0.024725332856178284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,127,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,255,0.05481599768002828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,255,0.05471999943256378
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,255,0.02465066562096278
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,255,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,1,0.07762133578459422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,1,0.07703466713428497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,1,0.02290133386850357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,1,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,3,0.07729066908359528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,3,0.07734933495521545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,3,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,3,0.022698665658632915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,7,0.0775679995616277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,15,0.07765866816043854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,7,0.07726933558781941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,7,0.02386666586001714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,7,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,15,0.07692266503969829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,15,0.022944000860055287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,15,0.022704000274340313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,31,0.07740800082683563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,31,0.07760000228881836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,31,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,31,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,63,0.07598933577537537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,63,0.07545066873232524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,63,0.022650666534900665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,63,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,127,0.07723199824492137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,127,0.07714133461316426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,127,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,127,0.027797333896160126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,255,0.08268266419569652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,255,0.08072533210118611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,255,0.02775466690460841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,255,0.03904533386230469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,3,0.034559999903043113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,1,0.13863999644915262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,1,0.139055997133255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,1,0.03442133218050003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,1,0.034645333886146545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,3,0.1397173305352529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,7,0.03442133218050003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,3,0.13797866304715475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,3,0.034602666894594826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,7,0.1397546629110972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,7,0.13889066378275552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,31,0.13897599776585898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,7,0.03443733354409536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,15,0.13934933145840964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,31,0.03478399912516276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,15,0.1394613285859426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,15,0.03444266567627589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,15,0.03401066611210505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,31,0.13988799850145975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,31,0.0345920001467069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,63,0.13917332887649536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,63,0.1392586628595988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,63,0.03445333242416382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,63,0.034688000877698265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,255,0.14721600214640299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,127,0.14087999860445657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,127,0.14039466778437296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,127,0.04359466830889384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,127,0.04420266548792521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,1,0.014538666854302088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,255,0.14763200283050537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,255,0.044106667240460716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,255,0.061146666606267296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,1,0.014645333091417948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,1,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,1,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,3,0.01463466634353002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,3,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,3,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,3,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,7,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,7,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,7,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,7,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,15,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,15,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,15,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,15,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,31,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,31,0.01623999948302905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,31,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,31,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,63,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,63,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,63,0.019717333217461903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,63,0.01982933282852173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,127,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,127,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,127,0.019541333119074505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,127,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,511,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,255,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,255,0.02012266715367635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,255,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,255,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,511,0.019850666324297588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,511,0.023578666150569916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,511,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,1023,0.01969066634774208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,1023,0.03310399999221166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,1023,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,1023,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,2047,0.019738666713237762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,2047,0.05409066875775655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,2047,0.019823999454577763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,2047,0.05580799778302511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,4095,0.019802667200565338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,4095,0.0913759966691335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,4095,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,4095,0.09357333183288574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,8191,0.01978133370478948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,8191,0.1691840092341105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,8191,0.019754666835069656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,8191,0.17269333203633627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,16383,0.019786667078733444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,16383,0.32498133182525635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,16383,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,16383,0.3330026666323344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,32767,0.01977066695690155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,32767,0.81932266553243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,32767,0.019626667102177937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,32767,0.7593812942504883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,1,0.014645333091417948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,1,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,1,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,1,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,3,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,3,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,3,0.010399999717871347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,3,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,7,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,7,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,7,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,7,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,15,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,15,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,15,0.010224000240365664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,15,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,31,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,31,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,31,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,31,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,63,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,63,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,63,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,255,0.019925333559513092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,63,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,127,0.01979200045267741
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,127,0.01960533360640208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,127,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,127,0.012170666207869848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,255,0.019850666324297588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,255,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,255,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,511,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,511,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,511,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,511,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,1023,0.014607999473810196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,1023,0.014256000518798828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,1023,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,1023,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,4095,0.024149333437283833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,4095,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,2047,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,2047,0.021738665799299877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,2047,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,2047,0.01573333392540614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,4095,0.020474666108687718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,4095,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,8191,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,8191,0.03419733295838038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,8191,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,8191,0.0276853342851003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,16383,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,16383,0.04730133215586344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,16383,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,16383,0.04302933315436045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,32767,0.034261333445707955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,32767,0.07547200222810109
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,32767,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,32767,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,1,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,1,0.014698666830857595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,1,0.012538666526476542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,1,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,3,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,3,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,3,0.008634666601816813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,3,0.010170666500926018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,7,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,7,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,7,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,7,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,15,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,15,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,15,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,31,0.016303999970356624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,31,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,31,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,31,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,63,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,63,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,63,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,63,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,127,0.01978133370478948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,127,0.020154666155576706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,127,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,127,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,255,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,255,0.012351999680201212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,255,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,255,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,511,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,511,0.013466666142145792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,511,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,511,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,1023,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,1023,0.018170667191346485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,1023,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,1023,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,2047,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,2047,0.026005332668622334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,2047,0.009989333028594652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,2047,0.014021333307027817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,4095,0.024735999604066212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,8191,0.02914133419593175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,4095,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,4095,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,4095,0.018346666047970455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,8191,0.028463999430338543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,8191,0.045978665351867676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,8191,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,16383,0.03496533383925756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,16383,0.06892266869544983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,16383,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,16383,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,32767,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,32767,0.09331199526786804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,32767,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,32767,0.06714666883150737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,1,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,1,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,1,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,1,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,3,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,3,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,3,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,3,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,7,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,7,0.38226131598154706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,7,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,7,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,15,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,15,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,15,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,15,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,31,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,31,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,31,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,31,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,63,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,63,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,63,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,127,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,127,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,127,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,127,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,255,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,255,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,255,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,255,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,511,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,511,0.018165333817402523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,511,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,511,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,1023,0.022405333817005157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,1023,0.025813333690166473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,1023,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,1023,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,2047,0.027914665639400482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,2047,0.03664533297220866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,2047,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,2047,0.017738666385412216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,4095,0.03484266748030981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,4095,0.05353599786758423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,4095,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,4095,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,8191,0.036490666369597115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,8191,0.06782400111357371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,8191,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,8191,0.030426666140556335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,16383,0.03479466587305069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,16383,0.08993599812189738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,16383,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,16383,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,32767,0.0344106654326121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,32767,0.1328266660372416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,32767,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,32767,0.0811359981695811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,1,0.5897599856058756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,1,0.5903626680374146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,1,0.5946613152821859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,7,0.6187893152236938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,1,0.5942293405532837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,3,0.5918293396631876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,3,0.5915306806564331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,3,0.5947039922078451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,3,0.5940746863683065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,7,0.6191360155741373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,7,0.6250079870223999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,7,0.624725341796875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,15,0.645146648089091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,15,0.6451093355814616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,15,0.6489173173904419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,15,0.6488053401311239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,31,0.8307519753774008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,31,0.8269813060760498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,31,0.8328426678975424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,31,0.8327253659566244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,63,0.8363359769185384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,63,0.836143970489502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,63,0.8446133136749268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,63,0.8449119726816813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,127,0.8555839856465658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,127,0.8547893365224203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,127,0.84934401512146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,127,0.8488266468048096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,1,0.0969599982102712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,1,0.09711466232935588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,1,0.03702933341264725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,7,0.09688533345858256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,1,0.03696533292531967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,3,0.09770666559537251
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,3,0.09769599636395772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,3,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,3,0.03694933404525121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,7,0.09716799855232239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,7,0.03682666768630346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,7,0.036831999818483986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,15,0.0977226694424947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,15,0.09815466403961182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,15,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,15,0.03716266651948293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,31,0.09748799602190654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,31,0.09704533219337463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,31,0.03699733316898346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,31,0.03701333453257879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,63,0.09648533662160237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,63,0.09689066807428996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,63,0.03693866729736328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,63,0.03690666705369949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,127,0.09730133414268494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,127,0.09752000371615092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,127,0.045082668463389076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,127,0.04488533238569895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,1,0.14389333128929138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,1,0.14486400286356607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,1,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,1,0.04156800111134847
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,3,0.14406399925549826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,3,0.14365333318710327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,3,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,3,0.04142933338880539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,7,0.1439786652723948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,7,0.1451520025730133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,7,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,7,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,15,0.14385066429773966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,15,0.14400532841682434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,15,0.04137066751718521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,15,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,31,0.1453333298365275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,31,0.14548800388971964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,31,0.041834667325019836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,63,0.143477330605189
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,31,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,63,0.14381866653760275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,63,0.041008000572522484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,63,0.041333332657814026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,127,0.1462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,127,0.14737066626548767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,127,0.058330665032068886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,127,0.058042665322621666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,1,0.0717386653025945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,3,0.27001599470774335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,3,0.269978662331899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,3,0.07201066613197327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,1,0.26763200759887695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,1,0.26844267050425213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,1,0.07166400055090587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,3,0.0717439999183019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,7,0.2704319953918457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,7,0.2701440056165059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,7,0.07166400055090587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,7,0.07193600138028462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,15,0.26822400093078613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,15,0.2695680061976115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,15,0.07166933516661327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,15,0.07172800103823344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,31,0.27395200729370117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,31,0.2744479974110921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,31,0.07169599831104279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,31,0.07188266515731812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,63,0.27213333050409955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,63,0.27407999833424884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,63,0.07115733126799266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,63,0.07144000132878621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,1,1.172976016998291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,127,0.27621867259343463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,127,0.27582399050394696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,127,0.08492799599965413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,3,1.1767466862996419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,127,0.08470400174458821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,1,1.1726453304290771
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,1,1.1827999750773113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,1,1.1847999890645344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,3,1.1769866943359375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,3,1.184122641881307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,3,1.1861546834309895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,7,1.238037347793579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,7,1.2370773156483967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,7,1.244266668955485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,7,1.2450933456420898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,15,1.3028426965077717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,15,1.302496035893758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,15,1.3007786273956299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,15,1.299295981725057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,31,1.6696213086446126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,31,1.6699892679850261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,31,1.6693065961201985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,31,1.6712212562561035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,63,1.7034932772318523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,63,1.703503926595052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,63,1.7059574127197266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,63,1.7068212827046711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,1,0.18781866629918417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,1,0.18733867009480795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,1,0.08658132950464885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,1,0.08582400282224019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,3,0.18758400281270346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,3,0.18556799491246542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,3,0.08551466464996338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,3,0.0865066647529602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,7,0.18769599994023642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,7,0.1874879995981852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,7,0.08644800384839375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,7,0.08589333295822144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,15,0.18829333782196045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,31,0.08591999610265096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,15,0.18645334243774414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,15,0.08592533071835835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,15,0.0867680013179779
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,31,0.189520001411438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,31,0.18879467248916626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,31,0.08642666538556416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,63,0.18687466780344644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,63,0.186463991800944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,63,0.08590933680534363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,63,0.0855466624101003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,1,0.28285332520802814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,1,0.2836853265762329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,1,0.09117333094278972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,1,0.09082133571306865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,3,0.28364266951878864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,3,0.28167466322580975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,3,0.0902453362941742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,3,0.09132799506187439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,7,0.28435200452804565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,7,0.2850933273633321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,7,0.09040533502896626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,7,0.09112000465393066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,15,0.2850559949874878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,15,0.2840320070584615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,15,0.08959466218948364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,15,0.09098666906356812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,31,0.28697067499160767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,31,0.2877333362897237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,31,0.09057066837946574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,31,0.09087999661763509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,63,0.284170667330424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,63,0.2826240062713623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,63,0.08921600381533305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,63,0.09030399719874065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,1,0.5382879972457886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,1,0.5361706813176473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,1,0.13698133826255798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,1,0.13699199755986533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,3,0.5341386795043945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,3,0.13539733489354452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,3,0.5379786491394043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,3,0.1366986632347107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,7,0.5405866702397665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,7,0.5397546688715616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,7,0.13698666294415793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,7,0.1365653375784556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,15,0.5384746789932251
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,15,0.5382239818572998
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,15,0.13714133699735007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,15,0.136543999115626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,31,0.5467626651128134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,31,0.5460160175959269
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,31,0.13660800457000732
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,31,0.13660266995429993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,63,0.5372746785481771
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,63,0.537061333656311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,63,0.13657066226005554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,63,0.13612266381581625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,1,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,1,0.014570667097965876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,1,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,1,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,3,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,3,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,3,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,3,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,7,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,7,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,7,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,7,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,15,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,15,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,15,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,15,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,31,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,31,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,31,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,31,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,63,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,127,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,63,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,63,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,63,0.019445333629846573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,127,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,127,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,127,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,255,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,255,0.022602667411168415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,255,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,255,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,511,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,511,0.032229334115982056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,1023,0.054330666859944664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,511,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,511,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,1023,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,1023,0.052154665191968284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,4095,0.1704639991124471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,1023,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,2047,0.019445333629846573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,8191,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,2047,0.09173867106437683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,2047,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,2047,0.09471999605496724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,4095,0.01960533360640208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,4095,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,4095,0.1766773263613383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,8191,0.3321546713511149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,8191,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,8191,0.34060800075531006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,16383,0.019866666446129482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,16383,0.8008106549580892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,16383,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,1,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,16383,0.7824693520863851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,1,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,7,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,1,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,1,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,3,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,3,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,3,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,3,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,15,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,7,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,7,0.009557333464423815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,7,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,15,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,31,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,15,0.016069332758585613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,15,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,31,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,31,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,31,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,63,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,63,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,63,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,63,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,127,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,127,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,127,0.009984000275532404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,127,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,255,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,255,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,255,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,255,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,511,0.013909333695967993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,511,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,511,0.010437333335479101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,511,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,1023,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,1023,0.02075733368595441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,1023,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,1023,0.0145066666106383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,2047,0.022858666876951855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,2047,0.028933333853880566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,2047,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,2047,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,4095,0.02770666778087616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,4095,0.03886933376391729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,4095,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,4095,0.028783999383449554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,8191,0.032618666688601174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,8191,0.054666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,16383,0.06181333462397257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,8191,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,8191,0.036943999429543815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,1,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,16383,0.04040000090996424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,16383,0.08230400085449219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,16383,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,1,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,1,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,1,0.012608000387748083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,3,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,3,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,3,0.00985599992175897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,3,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,15,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,7,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,7,0.010346666599313417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,7,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,15,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,63,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,15,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,15,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,31,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,31,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,31,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,31,0.010181333248813948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,63,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,63,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,63,0.010175999874869982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,255,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,127,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,127,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,127,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,127,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,255,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,255,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,255,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,511,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,511,0.01836799954374631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,511,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,511,0.010399999717871347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,1023,0.02271466702222824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,1023,0.02571200082699458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,1023,0.009632000078757605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,1023,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,2047,0.028768000503381092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,2047,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,2047,0.009935999910036722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,2047,0.020762667059898376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,4095,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,4095,0.05320000151793162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,4095,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,4095,0.0268053337931633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,8191,0.03594133257865906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,8191,0.06824000179767609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,8191,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,8191,0.036490666369597115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,16383,0.03526933242877325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,16383,0.08970666925112407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,16383,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,16383,0.05858133236567179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,1,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,1,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,1,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,1,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,3,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,3,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,3,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,3,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,7,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,7,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,7,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,7,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,15,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,15,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,15,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,15,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,31,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,31,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,31,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,31,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,63,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,63,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,63,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,63,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,255,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,127,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,127,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,127,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,127,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,255,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,255,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,1023,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,255,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,511,0.023919999599456787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,511,0.026122666895389557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,511,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,511,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,1023,0.03379733363787333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,1023,0.010314666976531347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,1023,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,2047,0.03621333340803782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,2047,0.05193066596984863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,2047,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,2047,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,4095,0.03549333413441976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,4095,0.06705600023269653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,4095,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,4095,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,8191,0.03643200049797694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,8191,0.08915199836095174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,8191,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,1,0.02459733436505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,8191,0.04704533517360687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,16383,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,16383,0.13266133268674216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,16383,0.010234666367371878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,16383,0.07991999884446462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,1,0.02478400121132533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,3,0.02496533344189326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,3,0.025759999950726826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,7,0.025888000925381977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,7,0.02601066728432973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,15,0.03230933348337809
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,15,0.03233066697915395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,255,0.05829333265622457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,31,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,31,0.03233066697915395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,63,0.032773333291212715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,63,0.03271999955177307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,127,0.03875199953715006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,127,0.03881600002447764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,255,0.05871466795603434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,511,0.0972160001595815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,511,0.09816533327102661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,1023,0.17482666174570718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,1023,0.17669334014256796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,2047,0.3296479980150859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,2047,0.33378668626149494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,4095,0.6510879993438721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,4095,0.6491146485010783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,8191,1.290053367614746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,8191,1.2968053023020427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,1,0.01357866699496905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,1,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,3,0.01403733342885971
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,3,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,7,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,31,0.013306666165590286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,63,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,31,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,15,0.01340266689658165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,63,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,127,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,255,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,1023,0.03578133384386698
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,1023,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,2047,0.05462933580080668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,255,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,2047,0.03789333254098892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,127,0.014122666170199713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,4095,0.08613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,4095,0.06285866598288219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,8191,0.10980266332626343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,511,0.02426133304834366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,511,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,8191,0.08286400139331818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,1,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,31,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,7,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,1,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,7,0.007477333148320516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,3,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,63,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,3,0.007690666864315669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,15,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,63,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,127,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,31,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,1023,0.04804799954096476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,15,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,255,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,511,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,2047,0.0349386657277743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,1023,0.02696000039577484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,2047,0.06691733499368031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,127,0.007957333077987036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,255,0.022405333817005157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,511,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,8191,0.07407466570536296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,4095,0.050800000627835594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,4095,0.0846720039844513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,8191,0.12079999844233195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,1,0.006815999746322632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,7,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,3,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,31,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,63,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,15,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,127,0.007818666597207388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,1,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,3,0.016309333344300587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,255,0.03065066784620285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,31,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,511,0.04741866886615753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,511,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,63,0.01590399940808614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,1023,0.062133332093556724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,15,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,2047,0.08321066697438557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,7,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,2047,0.04018666595220566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,4095,0.11911466717720032
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,127,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,255,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,8191,0.19075733423233032
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,1023,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,1,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,1,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,3,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,4095,0.058543999989827476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,3,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,8191,0.09886399904886882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,7,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,7,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,15,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,15,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,31,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,31,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,63,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,63,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,127,0.011322667201360067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,127,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,255,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,255,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,511,0.01618133361140887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,511,0.01578666642308235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,1023,0.027823999524116516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,1023,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,2047,0.035114665826161705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,2047,0.03469866762558619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,4095,0.04836266736189524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,4095,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,8191,0.07604800164699554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,8191,0.07425066828727722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,16383,0.12943999965985617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,16383,0.12661866346995035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,32767,0.23750933011372885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,32767,0.23245332638422647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,1,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,1,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,3,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,7,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,7,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,31,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,31,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,63,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,63,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,127,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,127,0.010154666379094124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,255,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,3,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,1023,0.027722666660944622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,511,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,15,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,15,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,511,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,1023,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,2047,0.03497066597143809
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,2047,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,255,0.011418666690587997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,16383,0.026154667139053345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,4095,0.018085333208243053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,4095,0.018133333573738735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,8191,0.021957332889238994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,8191,0.022543999056021374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,16383,0.024362665911515553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,32767,0.030576000610987347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,32767,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,1,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,1,0.00749333327015241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,3,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,3,0.006784000123540561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,7,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,7,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,15,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,15,0.006735999758044879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,31,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,31,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,63,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,63,0.006704000135262807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,127,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,127,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,255,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,255,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,511,0.016016000260909397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,511,0.010112000008424124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,1023,0.027808000644048054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,1023,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,2047,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,2047,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,4095,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,4095,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,8191,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,8191,0.016016000260909397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,16383,0.021738665799299877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,16383,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,32767,0.03460799902677536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,32767,0.02659733345111211
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,1,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,1,0.0058613332609335584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,3,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,3,0.006053333481152852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,63,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,7,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,7,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,15,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,15,0.006117333347598712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,31,0.009477333476146063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,31,0.00578666664659977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,511,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,511,0.008127999802430471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,1023,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,63,0.006095999851822853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,127,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,127,0.006720000257094701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,4095,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,255,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,255,0.007765333478649457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,1023,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,2047,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,2047,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,4095,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,8191,0.025727999707063038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,8191,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,16383,0.029792000850041706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,16383,0.017759999881188076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,32767,0.04875733455022176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,1,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,32767,0.02496533344189326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,1,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,3,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,3,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,7,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,7,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,15,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,15,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,31,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,31,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,63,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,63,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,127,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,127,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,255,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,255,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,511,0.025861332813898723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,511,0.026015999416510265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,1023,0.030181333422660828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,1023,0.030373332401116688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,2047,0.04058666775623957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,2047,0.04058666775623957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,4095,0.06112533311049143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,4095,0.06028266747792562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,8191,0.10169600447018941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,8191,0.09992532928784688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,16383,0.1824959913889567
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,16383,0.17920533816019693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,3,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,32767,0.34381866455078125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,1,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,1,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,15,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,32767,0.33771733442942303
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,3,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,7,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,7,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,15,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,31,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,31,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,63,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,63,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,127,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,127,0.010186666622757912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,255,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,255,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,511,0.02603200078010559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,511,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,1023,0.030165334542592365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,1023,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,2047,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,2047,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,4095,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,4095,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,8191,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,8191,0.021856000026067097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,16383,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,16383,0.026314665873845417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,32767,0.04445866743723551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,32767,0.039077334105968475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,1,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,1,0.006826666494210561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,3,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,3,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,7,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,7,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,15,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,15,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,31,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,31,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,63,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,63,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,127,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,127,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,255,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,255,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,511,0.02613866577545802
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,511,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,1023,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,1023,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,2047,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,2047,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,4095,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,4095,0.013376000026861826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,8191,0.02657066782315572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,8191,0.01803733284274737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,16383,0.030879999200503033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,16383,0.02160533269246419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,7,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,32767,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,32767,0.0322080006202062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,1,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,1,0.006159999718268712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,3,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,3,0.005989333614706993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,7,0.006106666599710782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,15,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,15,0.006181333214044571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,31,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,31,0.006266666576266289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,63,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,63,0.006197333335876465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,127,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,127,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,255,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,255,0.008069333309928576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,511,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,511,0.008432000254591307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,1023,0.013306666165590286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,1023,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,2047,0.0200853335360686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,2047,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,4095,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,4095,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,8191,0.034789333740870156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,8191,0.017743999759356182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,16383,0.050373335679372154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,16383,0.029002666473388672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,32767,0.07854400078455608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,32767,0.04043200115362803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,1,0.04211199780305227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,1,0.043040002385775246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,3,0.04371733466784159
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,15,0.057034666339556374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,3,0.044533332188924156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,7,0.04513066510359446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,7,0.04607999821503957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,15,0.05745600163936615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,31,0.05770133435726166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,31,0.05739733576774597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,63,0.058058664202690125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,63,0.05821866790453593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,127,0.07023466626803081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,127,0.0703893353541692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,255,0.10883200168609619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,255,0.10900800426801045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,511,0.1860533356666565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,511,0.18636800845464072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,1023,0.3381386597951253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,1023,0.33955732981363934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,2047,0.6477866570154825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,3,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,2047,0.6470400094985962
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,1,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,1,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,4095,1.2616746425628662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,4095,1.2705972989400227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,3,0.013530666629473368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,7,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,7,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,15,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,15,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,31,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,31,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,63,0.013349333157142004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,63,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,127,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,127,0.009984000275532404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,255,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,255,0.013568000247081121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,511,0.035274667044480644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,511,0.024826665719350178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,1023,0.058677335580190025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,1023,0.044453332821528115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,2047,0.08366400003433228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,2047,0.060133333007494606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,4095,0.10774399836858113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,1,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,4095,0.08101333181063335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,1,0.008122666428486506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,3,0.016447999825080235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,3,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,7,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,7,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,15,0.016336000214020412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,15,0.007962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,31,0.016037333756685257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,31,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,63,0.016373333831628162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,63,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,127,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,127,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,255,0.03182933231194814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,2047,0.08461333314577739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,255,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,511,0.04885333279768626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,511,0.02608533451954524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,1023,0.06323733429114024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,1023,0.03384533276160558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,2047,0.0505920002857844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,4095,0.12158933281898499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,4095,0.07211199899514516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,1,0.026485333840052288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,3,0.02587199956178665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,7,0.025957333544890087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,7,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,15,0.025813333690166473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,15,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,31,0.025786665578683216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,63,0.025583999852339428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,127,0.026000000536441803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,127,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,255,0.05048533280690511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,255,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,511,0.06525866687297821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,511,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,1023,0.08160533507664998
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,1023,0.040821333726247154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,2047,0.12101866801579793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,2047,0.059077332417170204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,4095,0.19210133949915567
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,4095,0.09824533263842265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,1,0.00847999999920527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,1,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,3,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,3,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,7,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,7,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,15,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,15,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,31,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,31,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,63,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,63,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,127,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,127,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,255,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,255,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,511,0.017952000101407368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,511,0.01807466646035512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,1023,0.02788266787926356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,1023,0.028362666567166645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,2047,0.04775466521581014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,2047,0.048672000567118325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,4095,0.08738666772842407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,16383,0.32703999678293866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,4095,0.08875200152397156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,8191,0.16642666856447855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,8191,0.16872533162434897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,16383,0.32465600967407227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,1,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,1,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,32767,0.8329439957936605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,3,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,32767,0.772437334060669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,3,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,7,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,7,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,15,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,15,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,31,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,31,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,63,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,63,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,127,0.011546666423479715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,127,0.010266666611035665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,255,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,255,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,511,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,511,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,1023,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,1023,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,2047,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,2047,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,4095,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,4095,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,8191,0.03303466737270355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,8191,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,16383,0.039077334105968475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,16383,0.031093334158261616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,32767,0.0631573349237442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,32767,0.054560000697771706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,7,0.007120000198483467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,1,0.0085333331177632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,1,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,3,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,3,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,7,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,15,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,15,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,31,0.007482666522264481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,63,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,63,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,127,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,127,0.007802666475375493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,255,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,255,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,511,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,511,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,1023,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,1023,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,2047,0.02176533391078313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,2047,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,4095,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,4095,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,8191,0.0365226666132609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,8191,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,16383,0.05337599913279215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,16383,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,3,0.006277333324154218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,1,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,32767,0.0806826651096344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,32767,0.05044800043106079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,1,0.006165333092212677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,3,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,7,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,7,0.006037333359320958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,15,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,15,0.006197333335876465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,31,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,31,0.005941333249211311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,63,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,63,0.006058666855096817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,127,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,127,0.006618666773041089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,255,0.012576000144084295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,255,0.008165333420038223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,511,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,511,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,1023,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,1023,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,2047,0.028565332293510437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,2047,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,4095,0.040074666341145836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,4095,0.020143999407688778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,8191,0.058058664202690125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,8191,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,32767,0.12352533141771953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,16383,0.08613866567611694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,16383,0.04342400034268697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,1,0.07889066636562347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,32767,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,1,0.08045866588751475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,15,0.1088853379090627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,3,0.08186666667461395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,3,0.08331199983755748
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,7,0.08548266688982646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,63,0.11030399799346924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,7,0.08673600355784099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,255,0.209279994169871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,15,0.10948266585667928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,255,0.20945066213607788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,31,0.10968533158302307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,31,0.10883733630180359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,63,0.11055466532707214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,127,0.1344000001748403
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,127,0.13406933347384134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,511,0.35945598284403485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,511,0.3603946765263875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,2047,1.2669973373413086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,1023,0.6639039913813273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,3,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,1023,0.65938667456309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,1,0.020448000480731327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,1,0.0102186668664217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,2047,1.2636213302612305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,3,0.020442667106787365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,7,0.02042666698495547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,63,0.020362666497627895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,7,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,127,0.02027200038234393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,15,0.0207893339296182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,15,0.010298666854699453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,31,0.020506666352351505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,511,0.0433599998553594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,31,0.010309333602587381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,63,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,127,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,255,0.039349332451820374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,255,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,511,0.06105599800745646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,1023,0.07898666461308797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,1023,0.06195733447869619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,2047,0.10965333382288615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,2047,0.08089066545168559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,1,0.026869334280490875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,1,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,15,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,3,0.026949333647886913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,3,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,7,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,7,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,15,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,31,0.026911998788515728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,31,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,63,0.026650667190551758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,63,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,127,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,127,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,255,0.052341332038243614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,255,0.025989333788553875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,2047,0.12378666798273723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,511,0.06649066507816315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,511,0.035717333356539406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,1023,0.08416533470153809
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,1023,0.051269332567850746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,2047,0.07450133562088013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,1,0.045882667104403176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,1,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,3,0.0461760014295578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,3,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,31,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,7,0.04571733375390371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,63,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,7,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,15,0.04584000011285146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,15,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,31,0.045893331368764244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,63,0.045797333121299744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,127,0.04693333307902018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,127,0.018581333259741466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,255,0.054042667150497437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,255,0.024634666740894318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,2047,0.17843733231226602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,511,0.07098666826883952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,511,0.03664000084002813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,1023,0.10566932956377666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,1023,0.05571199953556061
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,2047,0.0955573320388794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,1,0.1514346698919932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,1,0.1553600033124288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,3,0.15829333662986755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,3,0.1612106661001841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,7,0.16595199704170227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,7,0.16900267203648886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,15,0.21284266312917074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,15,0.21188799540201822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,31,0.21319466829299927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,31,0.2121760050455729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,127,0.2622986634572347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,63,0.21545066436131796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,63,0.21522667010625204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,127,0.2628213365872701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,255,0.41014401117960614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,255,0.4108853340148926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,1,0.03480533262093862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,511,0.7133119901021322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,1,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,511,0.707530657450358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,3,0.03515733281771342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,3,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,7,0.03465066601832708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,7,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,15,0.03522666543722153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,15,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,31,0.034490667283535004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,31,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,63,0.034586665530999504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,63,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,127,0.03454933315515518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,127,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,255,0.06799999872843425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,255,0.0469760000705719
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,511,0.08418132861455281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,511,0.06492800017197926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,1,0.04717333118120829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,1,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,3,0.047397335370381675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,3,0.01829333355029424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,7,0.0481333335240682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,7,0.01838933303952217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,15,0.04706133405367533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,15,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,31,0.04734399914741516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,31,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,255,0.032085334261258446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,63,0.04679466784000397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,63,0.01825599993268649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,1,0.08354133367538452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,127,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,127,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,255,0.0573226660490036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,511,0.07467199862003326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,511,0.04753066599369049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,1,0.026154667139053345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,3,0.08347200353940327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,3,0.026181332767009735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,7,0.08437866965929668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,7,0.02628266563018163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,15,0.0836906631787618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,15,0.02607999990383784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,31,0.08385599652926128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,31,0.026122666895389557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,63,0.0831413318713506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,63,0.026101333399613697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,127,0.08628267049789429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,1,0.2959946592648824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,127,0.0330079992612203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,255,0.09797333677609761
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,255,0.04656533400217692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,511,0.13090133666992188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,511,0.06551466882228851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,1,0.30396799246470135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,3,0.3111413319905599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,3,0.3171839912732442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,7,0.326581339041392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,7,0.33192533254623413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,15,0.42371201515197754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,15,0.4187146822611491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,31,0.4211039940516154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,31,0.4192053476969401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,63,0.42526400089263916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,63,0.4248480002085368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,1,0.06322666505972545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,127,0.5230773289998373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,127,0.5185226599375407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,255,0.8236479759216309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,255,0.8171626726786295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,1,0.030799999833106995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,3,0.06400000055631001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,3,0.03216533362865448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,7,0.06347199777762096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,7,0.030597334106763203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,15,0.06331199904282887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,15,0.030453334252039593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,31,0.06306133170922597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,31,0.030495998760064442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,255,0.07494933406511943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,63,0.06453866759936015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,63,0.032261334359645844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,127,0.06483733157316844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,127,0.038586666186650596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,1,0.08716799815495808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,255,0.059088001648585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,1,0.03437866767247518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,3,0.08760000268618266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,3,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,15,0.03446933378775915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,7,0.0876639982064565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,7,0.03435199956099192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,63,0.08768533666928609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,15,0.08727467060089111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,31,0.0883893370628357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,31,0.0330826664964358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,63,0.03426666557788849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,127,0.09019733468691508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,127,0.050111999114354454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,255,0.1033066709836324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,255,0.062021334966023765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,1,0.15798933307329813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,1,0.055344000458717346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,3,0.05619200070699056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,3,0.15925332903862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,7,0.1572533349196116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,7,0.0557226687669754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,15,0.15915733575820923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,15,0.05522666871547699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,31,0.16012266278266907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,31,0.05643199880917867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,63,0.055434669057528176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,63,0.1612266699473063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,127,0.1606613298257192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,127,0.06607466439406078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,255,0.18479466438293457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,1,0.013690666606028875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,255,0.0839413305123647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,1,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,3,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,3,0.014005333185195923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,7,0.013967999567588171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,7,0.01413333291808764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,15,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,15,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,31,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,31,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,63,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,63,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,127,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,1023,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,1023,0.05271466573079427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,127,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,2047,0.09227733810742696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,255,0.02199999988079071
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,255,0.022250667214393616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,511,0.031930667658646904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,511,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,2047,0.0905013382434845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,4095,0.16810667514801025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,4095,0.16983999808629355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,8191,0.3243573307991028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,8191,0.3253706693649292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,16383,0.6534133354822794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,16383,0.6365333398183187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,1,0.013306666165590286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,1,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,3,0.013786666095256805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,3,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,7,0.013925333817799887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,32767,1.688528060913086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,7,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,15,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,32767,1.6887413660685222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,15,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,31,0.01826133330663045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,31,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,255,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,63,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,63,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,127,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,127,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,255,0.022090665996074677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,511,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,511,0.012346666306257248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,1023,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,1023,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,2047,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,2047,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,4095,0.02906133234500885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,4095,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,8191,0.044821331898371376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,8191,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,32767,0.08224533498287201
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,16383,0.06558399895826976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,16383,0.045797333121299744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,1,0.013397333522637686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,32767,0.1023573378721873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,15,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,1,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,3,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,3,0.007087999954819679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,7,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,7,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,15,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,31,0.018378666291634243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,31,0.007376000285148621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,255,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,63,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,63,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,127,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,127,0.007925333455204964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,255,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,511,0.013536000003417334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,511,0.010005333150426546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,1023,0.02015999952952067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,1023,0.011663999408483505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,2047,0.02882666637500127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,2047,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,4095,0.04083733260631561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,4095,0.024671999116738636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,8191,0.05881066620349884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,8191,0.031930667658646904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,16383,0.08738133311271667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,16383,0.054618666569391884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,32767,0.1262453297773997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,1,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,7,0.00620266670982043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,1,0.00613866684337457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,32767,0.07982400059700012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,3,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,3,0.005957333371043205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,7,0.010288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,15,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,15,0.006229333579540253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,31,0.006261333202322324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,63,0.010549332946538925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,63,0.00625599982837836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,127,0.010314666976531347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,127,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,255,0.013781332721312841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,255,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,511,0.01972266659140587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,511,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,4095,0.028197333216667175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,1023,0.028229333460330963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,1023,0.01569066693385442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,2047,0.04232533276081085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,2047,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,4095,0.06701333324114482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,8191,0.08498133222262065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,8191,0.04013866682847341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,16383,0.11973333358764648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,16383,0.05932799975077311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,32767,0.19196265935897827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,32767,0.09950400392214458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,1,0.5879733165105184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,1,0.6035786469777426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,3,0.6192266543706259
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,15,0.8413386344909668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,7,0.6519840161005656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,3,0.6318346659342448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,7,0.6616799831390381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,15,0.8344426949818929
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,31,0.85099196434021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,63,0.8649173577626547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,1,0.12053333719571431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,127,1.052506685256958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,127,1.0498346487681072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,31,0.8378933270772299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,63,0.8549333413441976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,1,0.08674666285514832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,3,0.12071999907493591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,3,0.08750399947166443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,7,0.12174399693806966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,7,0.08596266309420268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,31,0.12306666374206543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,15,0.12174933155377705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,63,0.12238933642705281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,15,0.08733333150545756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,31,0.08731733759244283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,63,0.08700799942016602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,127,0.12244799733161926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,127,0.0972160001595815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,1,0.16680532693862915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,1,0.07961600025494893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,3,0.1661066710948944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,3,0.07962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,7,0.1672266721725464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,7,0.07981333136558533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,15,0.16695467631022134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,15,0.07900266846021016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,31,0.1690773367881775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,31,0.07965333263079326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,63,0.07903466622034709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,63,0.16638400157292685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,127,0.16940265893936157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,127,0.09250133236249287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,1,0.3081173300743103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,1,0.10492799679438274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,3,0.30990399916966754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,3,0.10539733370145161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,7,0.31225067377090454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,7,0.10550399621327718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,15,0.3139786720275879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,15,0.10508267084757487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,31,0.10530666510264079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,63,0.31242666641871136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,63,0.10501333077748616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,31,0.31190399328867596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,127,0.3135253389676412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,127,0.12304533521334331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,1,1.2373759746551514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,1,1.263317346572876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,3,1.2664426962534587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,3,1.2923680146535237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,7,1.357210636138916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,15,1.691498597462972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,7,1.334671974182129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,15,1.6858933766682942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,31,1.7032160758972168
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,31,1.6901119550069172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,63,1.7239306767781575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,1,0.23466666539510092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,63,1.7128693262736003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,3,0.2334559957186381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,1,0.17192532618840536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,3,0.1710666616757711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,7,0.17181867361068726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,15,0.23470399777094522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,7,0.23407999674479166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,15,0.1713493267695109
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,31,0.23384000857671103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,31,0.17124267419179282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,63,0.17018665870030722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,1,0.32612266143163043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,1,0.14899733662605286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,3,0.3254080017407735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,63,0.23278399308522543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,3,0.148799995581309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,7,0.14841600259145102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,7,0.32614932457606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,15,0.3251466751098633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,15,0.14884799718856812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,31,0.3269439935684204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,31,0.14855466286341348
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,63,0.14839466412862143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,1,0.6116586526234945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,1,0.2029013236363729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,3,0.2025973399480184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,3,0.6133493185043335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,63,0.32368000348409015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,7,0.6167093515396118
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,7,0.20322666565577188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,1,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,15,0.6189759969711304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,15,0.2021920084953308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,1,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,3,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,31,0.20318400859832764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,3,0.014096000542243322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,31,0.616592009862264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,7,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,63,0.6101440191268921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,7,0.014538666854302088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,15,0.01826133330663045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,15,0.01802666609485944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,31,0.017840000490347546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,31,0.01777600000301997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,63,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,63,0.20152533054351807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,511,0.05117866893609365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,63,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,127,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,127,0.021738665799299877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,255,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,255,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,511,0.052416001756985985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,1023,0.09213866790135701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,1023,0.09318932890892029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,2047,0.17052799463272095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,2047,0.17459199825922647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,4095,0.329370657602946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,4095,0.33818666140238446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,8191,0.6665279865264893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,8191,0.6656959851582845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,1,0.013455999394257864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,1,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,3,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,3,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,7,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,16383,1.7003626823425293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,7,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,16383,1.7785813013712566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,15,0.017770666629076004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,15,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,31,0.018229333062966663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,31,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,63,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,63,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,127,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,127,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,255,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,255,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,511,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,511,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,1023,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,1023,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,2047,0.03461333364248276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,2047,0.02629866699377696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,4095,0.05074666440486908
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,4095,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,8191,0.07425066828727722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,8191,0.05683733522891998
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,16383,0.10857066512107849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,1,0.010373333469033241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,16383,0.08489066362380981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,1,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,3,0.007178666690985362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,7,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,7,0.007109333450595538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,15,0.010469333579142889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,15,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,31,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,31,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,63,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,63,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,127,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,127,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,1023,0.02903466671705246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,1023,0.01974933346112569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,255,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,255,0.009888000165422758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,511,0.02004266654451688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,511,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,2047,0.042863999803860985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,2047,0.023557332654794056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,4095,0.0671253353357315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,4095,0.03496533383925756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,8191,0.08550399541854858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,8191,0.05099200208981832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,1,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,16383,0.12123733758926392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,16383,0.07488533357779185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,1,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,3,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,3,0.006186666587988536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,7,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,7,0.006575999781489372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,15,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,15,0.006341333190600078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,31,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,31,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,63,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,63,0.0063146669417619705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,127,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,127,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,255,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,255,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,511,0.0284853329261144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,511,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,1023,0.046207999189694725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,1023,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,2047,0.06506666541099548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,2047,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,4095,0.0844586690266927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,4095,0.04043200115362803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,8191,0.11886399984359741
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,8191,0.06032533446947733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,1,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,16383,0.09860266248385112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,16383,0.18936532735824585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,1,0.013994666437307993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,1,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,1,0.007882666463653246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,3,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,3,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,3,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,3,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,7,0.014271999398867289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,7,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,7,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,7,0.007829333345095316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,15,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,15,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,15,0.007834666719039282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,15,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,31,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,31,0.01825599993268649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,31,0.007882666463653246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,31,0.00785600021481514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,63,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,63,0.01874133323629697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,63,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,63,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,127,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,127,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,127,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,127,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,255,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,255,0.013690666606028875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,255,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,255,0.010181333248813948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,511,0.01339200014869372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,511,0.013455999394257864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,511,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,511,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,1023,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,1023,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,1023,0.010005333150426546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,1023,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,2047,0.02205866575241089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,2047,0.02792000025510788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,2047,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,2047,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,4095,0.026965332527955372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,4095,0.03854399919509888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,8191,0.0544106662273407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,4095,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,4095,0.028309332827727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,8191,0.032069332897663116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,8191,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,8191,0.03628266602754593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,16383,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,16383,0.08307733138402303
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,1,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,16383,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,16383,0.06197333335876465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,1,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,1,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,1,0.006549333532651265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,3,0.010293333480755487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,3,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,3,0.006490666419267654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,3,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,7,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,7,0.006480000292261441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,7,0.006570666407545407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,15,0.010357333347201347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,15,0.010197333370645842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,15,0.006405333057045937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,15,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,31,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,31,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,31,0.006442666674653689
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,31,0.006613333399097125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,63,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,63,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,63,0.006533333410819371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,63,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,127,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,127,0.010501333822806677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,255,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,127,0.007002666592597961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,127,0.007226666435599327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,255,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,255,0.012319999436537424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,255,0.007925333455204964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,511,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,511,0.017610666652520496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,511,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,2047,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,511,0.009578666960199675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,1023,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,1023,0.024735999604066212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,1023,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,1023,0.012437333663304647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,2047,0.03669866671164831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,2047,0.009632000078757605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,2047,0.0199946661790212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,4095,0.03469333300987879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,4095,0.0528106689453125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,8191,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,8191,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,4095,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,4095,0.02605866640806198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,8191,0.034245334565639496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,16383,0.009989333028594652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,1,0.010266666611035665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,8191,0.06790400048096974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,1,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,16383,0.0346666673819224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,16383,0.08891200025876363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,1,0.010485333700974783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,16383,0.05753066639105479
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,1,0.0058133335163195925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,3,0.01020800011853377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,3,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,3,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,3,0.006010666489601135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,15,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,7,0.010266666611035665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,7,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,7,0.006058666855096817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,7,0.0057920000205437345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,15,0.010319999729593595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,15,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,15,0.006069333602984746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,31,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,31,0.010224000240365664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,31,0.006069333602984746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,31,0.0058080001423756284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,63,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,63,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,127,0.006208000083764394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,63,0.006021333237489064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,63,0.0075093333919843035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,127,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,127,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,127,0.006448000048597653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,255,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,255,0.018101333330074947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,255,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,255,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,511,0.02276266614596049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,511,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,511,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,511,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,1023,0.032986665765444435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,1023,0.039887999494870506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,1023,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,1023,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,2047,0.03523733218510946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,2047,0.051455999414126076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,4095,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,2047,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,2047,0.020362666497627895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,8191,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,4095,0.036277333895365395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,4095,0.06659733255704244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,4095,0.028746667007605236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,8191,0.08806932965914409
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,8191,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,16383,0.1317813297112783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,8191,0.04608533283074697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,16383,0.03547733277082443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,16383,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,16383,0.07894933223724365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,1,0.0064266665528217954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,3,0.0064266665528217954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,1,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,7,0.008405333384871483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,7,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,3,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,1,0.014581333845853806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,1,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,3,0.014549333602190018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,31,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,31,0.014469332993030548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,3,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,7,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,63,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,7,0.014426667243242264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,63,0.01422400027513504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,63,0.006298666819930077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,63,0.006058666855096817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,15,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,15,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,127,0.01421333352724711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,127,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,127,0.008112000301480293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,127,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,255,0.026261332134405773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,255,0.02622933437426885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,255,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,511,0.03573866685231527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,255,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,511,0.040287998815377556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,511,0.010506667196750641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,511,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,15,0.006325333068768184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,15,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,1023,0.03632533301909765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,1023,0.049226666490236916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,1023,0.010426666587591171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,1023,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,2047,0.03828266759713491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,2047,0.06675733129183452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,2047,0.010186666622757912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,31,0.006239999706546466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,2047,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,31,0.006319999694824219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,4095,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,4095,0.08820266524950664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,4095,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,4095,0.044922664761543274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,8191,0.03851199895143509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,8191,0.13237866759300232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,8191,0.009904000287254652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,8191,0.07739733159542084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,16383,0.03910933434963226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,16383,0.21902400255203247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,16383,0.010010666524370512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,1,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,3,0.007637333124876022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,16383,0.1423360009988149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,1,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,1,0.007680000116427739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,1,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,3,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,3,0.007647999872763951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,3,0.0075626665105422335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,7,0.007637333124876022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,7,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,7,0.007717333113153775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,7,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,15,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,15,0.007760000104705493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,15,0.007594666754206021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,15,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,31,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,31,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,31,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,31,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,63,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,63,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,127,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,127,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,255,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,63,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,63,0.007680000116427739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,127,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,127,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,255,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,255,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,255,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,511,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,511,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,511,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,2047,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,1023,0.013744000345468521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,1023,0.016399999459584553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,1023,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,1023,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,2047,0.016586666305859882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,2047,0.009488000224033991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,2047,0.012357333054145178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,4095,0.019744000087181728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,4095,0.028362666567166645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,4095,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,4095,0.01444799949725469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,8191,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,8191,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,8191,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,8191,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,16383,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,16383,0.018794666975736618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,16383,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,16383,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,32767,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,32767,0.022128000855445862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,32767,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,32767,0.026208000878492992
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,65535,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,1,0.007637333124876022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,1,0.007605333502093951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,65535,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,1,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,65535,0.026591998835404713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,1,0.006405333057045937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,65535,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,3,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,3,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,3,0.006511999915043513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,3,0.006528000036875407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,7,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,7,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,7,0.006288000072042148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,7,0.006506666541099548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,15,0.007717333113153775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,15,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,15,0.006506666541099548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,15,0.006325333068768184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,31,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,31,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,31,0.006351999938488007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,31,0.006522666662931442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,63,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,63,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,63,0.0064106664309899015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,63,0.006511999915043513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,127,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,127,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,127,0.006890666360656421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,127,0.007109333450595538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,255,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,511,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,255,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,255,0.007930666829148928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,255,0.008282666405042013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,511,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,511,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,511,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,1023,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,1023,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,1023,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,1023,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,2047,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,2047,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,2047,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,2047,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,4095,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,4095,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,4095,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,8191,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,4095,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,8191,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,8191,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,8191,0.007802666475375493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,16383,0.013925333817799887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,16383,0.016117333124081295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,16383,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,16383,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,32767,0.016506666938463848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,32767,0.019925333559513092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,32767,0.007829333345095316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,32767,0.022272000710169475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,65535,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,1,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,1,0.00785600021481514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,65535,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,65535,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,1,0.005669333040714264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,1,0.005578666925430298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,3,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,65535,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,3,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,3,0.005445333197712898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,3,0.005503999690214793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,7,0.007680000116427739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,7,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,7,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,7,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,15,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,15,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,15,0.005578666925430298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,15,0.005759999776879947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,31,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,31,0.007930666829148928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,31,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,31,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,63,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,127,0.008432000254591307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,127,0.00731733317176501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,63,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,255,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,63,0.005482666815320651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,63,0.005621333296100299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,127,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,127,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,255,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,255,0.007663999994595845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,255,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,511,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,511,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,1023,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,511,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,511,0.007631999750932057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,1023,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,1023,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,1023,0.007877333089709282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,2047,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,2047,0.012426666915416718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,4095,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,2047,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,2047,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,4095,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,4095,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,4095,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,8191,0.014074667046467463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,8191,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,8191,0.007605333502093951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,8191,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,16383,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,16383,0.016682667036851246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,16383,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,32767,0.0075626665105422335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,16383,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,32767,0.026346666117509205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,32767,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,32767,0.01985599969824155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,1,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,1,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,65535,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,65535,0.03198933353026708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,1,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,65535,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,1,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,7,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,7,0.007663999994595845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,3,0.007690666864315669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,65535,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,3,0.007882666463653246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,3,0.0057920000205437345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,3,0.005679999788602193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,7,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,7,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,15,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,15,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,15,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,15,0.00573333352804184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,31,0.007903999959429106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,31,0.00814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,31,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,31,0.005984000240763028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,63,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,63,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,127,0.006037333359320958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,63,0.005637333417932193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,63,0.006282666698098183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,127,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,127,0.009583999713261923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,127,0.006154666965206464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,255,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,255,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,255,0.0069226666043202085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,255,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,1023,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,511,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,511,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,1023,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,511,0.00679466687142849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,511,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,1023,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,1023,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,2047,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,2047,0.013434667140245438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,2047,0.007525333513816197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,2047,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,4095,0.012170666207869848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,8191,0.007631999750932057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,4095,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,4095,0.00701333334048589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,4095,0.010512000570694605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,8191,0.0199946661790212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,8191,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,8191,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,16383,0.01618133361140887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,16383,0.024192000428835552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,16383,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,16383,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,32767,0.03459733227888743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,32767,0.020495999604463577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,32767,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,1,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,1,0.008053333188096682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,32767,0.022101332743962605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,65535,0.023578666150569916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,65535,0.03265066693226496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,1,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,65535,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,1,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,65535,0.0525439977645874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,3,0.008058666562040647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,3,0.007776000226537387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,3,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,3,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,7,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,7,0.007802666475375493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,7,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,31,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,7,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,15,0.008058666562040647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,31,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,15,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,15,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,15,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,31,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,31,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,63,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,63,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,127,0.0085333331177632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,63,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,63,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,127,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,127,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,127,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,255,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,255,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,255,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,255,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,511,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,511,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,511,0.009573333586255709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,511,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,1023,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,1023,0.016447999825080235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,1023,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,1023,0.012389333297808966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,2047,0.02035733312368393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,2047,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,2047,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,2047,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,4095,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,4095,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,4095,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,4095,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,8191,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,8191,0.01811733345190684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,8191,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,8191,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,16383,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,16383,0.01657066618402799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,16383,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,16383,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,32767,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,32767,0.022821334501107533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,32767,0.026394667724768322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,32767,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,1,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,65535,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,1,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,65535,0.019786667078733444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,1,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,65535,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,1,0.006506666541099548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,65535,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,3,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,3,0.007647999872763951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,3,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,3,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,15,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,7,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,7,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,7,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,7,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,15,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,15,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,15,0.0063733334342638654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,31,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,31,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,31,0.008405333384871483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,31,0.006581333155433337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,63,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,63,0.006469333544373512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,63,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,255,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,63,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,127,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,127,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,127,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,127,0.007045333584149678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,255,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,255,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,255,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,511,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,511,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,511,0.007823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,511,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,1023,0.014032000054915747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,1023,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,1023,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,1023,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,2047,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,2047,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,2047,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,2047,0.011621333658695221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,8191,0.01618133361140887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,4095,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,4095,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,4095,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,4095,0.012437333663304647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,8191,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,8191,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,8191,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,16383,0.01463466634353002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,16383,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,16383,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,16383,0.018101333330074947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,32767,0.01803733284274737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,32767,0.027024000883102417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,32767,0.008016000191370646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,32767,0.02470933397610982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,1,0.007946666950980822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,65535,0.02022933339079221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,1,0.007791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,65535,0.03253866732120514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,1,0.005685333162546158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,1,0.00843733362853527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,65535,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,3,0.008117333054542542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,65535,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,3,0.008245333408315977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,3,0.005690666536490123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,3,0.005658666913708051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,7,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,7,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,7,0.005765333150823911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,7,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,15,0.008122666428486506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,15,0.008117333054542542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,15,0.005770666524767876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,63,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,15,0.005664000287652016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,31,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,31,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,31,0.0058453331391016645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,31,0.006864000111818314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,63,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,63,0.005925333127379417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,63,0.005589333052436511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,127,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,127,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,127,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,127,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,255,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,255,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,255,0.006949333474040031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,255,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,511,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,511,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,511,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,511,0.007967999825874964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,1023,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,1023,0.011978667229413986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,1023,0.007055999711155891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,1023,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,2047,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,2047,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,2047,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,2047,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,4095,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,4095,0.01358933374285698
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,4095,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,4095,0.010512000570694605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,8191,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,8191,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,8191,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,8191,0.013568000247081121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,16383,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,16383,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,16383,0.02492266645034154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,32767,0.03523733218510946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,16383,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,32767,0.020773333807786305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,32767,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,32767,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,1,0.008122666428486506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,1,0.005872000008821487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,1,0.008133333176374435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,3,0.008154666672150293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,65535,0.024608001112937927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,1,0.0058080001423756284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,65535,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,3,0.008042666440208754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,65535,0.05427733560403188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,3,0.007269333427151044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,3,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,15,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,65535,0.03577066709597906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,7,0.007893333211541176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,7,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,7,0.005882666756709416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,7,0.0058613332609335584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,15,0.008101333553592363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,15,0.007055999711155891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,15,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,63,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,63,0.00701333334048589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,31,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,31,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,127,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,31,0.00596266674498717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,31,0.005797333394487699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,63,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,63,0.00843733362853527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,127,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,127,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,511,0.012319999436537424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,127,0.006037333359320958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,255,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,255,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,255,0.006805333619316419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,255,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,511,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,511,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,511,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,1023,0.011322667201360067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,1023,0.012346666306257248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,1023,0.006949333474040031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,1023,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,2047,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,2047,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,2047,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,8191,0.018778666853904724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,2047,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,4095,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,4095,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,4095,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,4095,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,8191,0.02794666588306427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,8191,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,8191,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,16383,0.02258666604757309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,16383,0.03692800054947535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,16383,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,16383,0.020207999895016353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,32767,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,32767,0.05969599882761637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,32767,0.0074453335255384445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,32767,0.03547733277082443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,65535,0.03399466723203659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,1,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,1,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,65535,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,1,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,1,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,3,0.007893333211541176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,65535,0.09012800455093384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,65535,0.05220800141493479
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,3,0.012453333785136541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,3,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,3,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,7,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,7,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,7,0.008250666782259941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,7,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,15,0.012645332763592402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,31,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,15,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,15,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,63,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,15,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,31,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,31,0.007957333077987036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,31,0.008282666405042013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,63,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,63,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,63,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,127,0.01138666644692421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,127,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,127,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,127,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,255,0.014096000542243322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,255,0.01444799949725469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,255,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,255,0.010298666854699453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,511,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,511,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,511,0.02046400060256322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,511,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,1023,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,1023,0.02974933385848999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,1023,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,1023,0.020501332978407543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,2047,0.032101333141326904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,4095,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,2047,0.04274133344491323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,2047,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,2047,0.025786665578683216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,4095,0.06321600079536438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,8191,0.08214400211970012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,4095,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,4095,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,8191,0.039781334499518074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,8191,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,1,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,8191,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,1,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,1,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,1,0.006789333497484525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,7,0.007482666522264481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,3,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,3,0.006768000001708667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,3,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,7,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,7,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,31,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,7,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,15,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,15,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,15,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,63,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,15,0.006709333509206772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,31,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,127,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,31,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,31,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,63,0.010330666477481524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,63,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,63,0.006895999734600385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,127,0.010341333225369453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,255,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,127,0.007978666573762894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,127,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,255,0.020047999918460846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,255,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,255,0.008421333506703377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,511,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,511,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,1023,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,511,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,2047,0.051781331499417625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,511,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,1023,0.03388266762097677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,1023,0.04094400008519491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,1023,0.010346666599313417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,4095,0.03621333340803782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,2047,0.03575466573238373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,2047,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,2047,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,4095,0.06785066425800323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,4095,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,4095,0.0354666660229365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,8191,0.03637866675853729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,8191,0.08850666880607605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,8191,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,1,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,8191,0.055904000997543335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,1,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,3,0.006858666737874349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,1,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,1,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,3,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,3,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,3,0.0063040001938740415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,7,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,7,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,15,0.006325333068768184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,7,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,7,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,15,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,15,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,15,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,31,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,31,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,31,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,31,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,63,0.014469332993030548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,63,0.014570667097965876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,63,0.008629333227872849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,255,0.026602665583292644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,63,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,127,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,127,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,127,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,127,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,255,0.026741333305835724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,255,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,255,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,511,0.03637866675853729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,511,0.040591999888420105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,511,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,511,0.016565332810084026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,1023,0.0366239994764328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,1023,0.04953599969546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,1023,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,1023,0.019946667055288952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,2047,0.06762133538722992
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,2047,0.038880000511805214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,4095,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,2047,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,2047,0.028778667251269024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,4095,0.039162665605545044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,4095,0.0890773336092631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,4095,0.04587733248869578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,8191,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,8191,0.1329813301563263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,1,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,8191,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,1,0.023930666347344715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,8191,0.07938133180141449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,1,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,1,0.006949333474040031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,3,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,3,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,3,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,3,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,7,0.024133334557215374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,7,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,7,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,7,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,15,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,15,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,15,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,15,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,63,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,31,0.024101334313551586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,31,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,31,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,31,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,63,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,63,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,63,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,127,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,127,0.024154665569464367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,127,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,127,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,255,0.04278933505217234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,255,0.04320533573627472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,255,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,255,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,511,0.043605332573254905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,511,0.053039997816085815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,511,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,511,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,1023,0.02807466685771942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,1023,0.04298666616280874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,1023,0.06564799944559734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,1023,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,2047,0.04693866769472758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,2047,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,2047,0.09071466326713562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,2047,0.04587199787298838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,4095,0.0455626646677653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,4095,0.1338933308919271
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,4095,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,4095,0.08009600142637889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,8191,0.04571733375390371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,1,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,1,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,8191,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,8191,0.22070932388305664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,3,0.00789866658548514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,8191,0.14126933614412943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,1,0.008272000278035799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,1,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,3,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,3,0.007663999994595845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,7,0.00789866658548514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,3,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,7,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,7,0.008266666904091835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,7,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,15,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,15,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,15,0.008127999802430471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,15,0.00814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,31,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,31,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,31,0.007696000238259633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,31,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,63,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,63,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,63,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,63,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,127,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,127,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,127,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,127,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,255,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,255,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,255,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,255,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,1023,0.02231466770172119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,511,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,511,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,511,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,2047,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,511,0.010309333602587381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,1023,0.026789332429567974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,1023,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,1023,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,2047,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,2047,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,2047,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,8191,0.019871999820073444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,4095,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,4095,0.01634666696190834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,4095,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,4095,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,8191,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,8191,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,8191,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,16383,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,16383,0.020773333807786305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,16383,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,16383,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,32767,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,32767,0.03369066615899404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,32767,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,1,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,65535,0.023872000475724537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,32767,0.030597334106763203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,65535,0.04079466561476389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,1,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,1,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,1,0.006549333532651265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,3,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,65535,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,3,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,65535,0.04012266546487808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,3,0.006501333167155583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,3,0.007514666765928268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,7,0.008378666515151659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,7,0.008016000191370646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,7,0.0064213331788778305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,7,0.006480000292261441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,15,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,15,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,15,0.0064106664309899015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,15,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,31,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,31,0.008245333408315977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,31,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,31,0.006437333300709724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,63,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,63,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,63,0.006442666674653689
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,63,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,127,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,127,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,127,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,127,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,255,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,255,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,255,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,255,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,511,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,1023,0.012479999413092932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,511,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,511,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,511,0.009839999799927076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,1023,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,2047,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,1023,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,1023,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,2047,0.014096000542243322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,2047,0.014538666854302088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,2047,0.011322667201360067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,4095,0.014165333161751429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,4095,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,4095,0.007829333345095316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,4095,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,8191,0.017818666994571686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,8191,0.02186666677395503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,8191,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,8191,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,16383,0.018325333793958027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,16383,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,16383,0.008207999790708223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,16383,0.020586666961510975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,32767,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,32767,0.02207999924818675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,32767,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,32767,0.028245332340399425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,1,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,1,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,65535,0.025600001215934753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,1,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,1,0.005621333296100299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,65535,0.05526933570702871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,3,0.007605333502093951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,3,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,3,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,65535,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,65535,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,3,0.0058453331391016645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,7,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,7,0.007893333211541176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,7,0.005850666513045629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,7,0.006837333242098491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,15,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,15,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,15,0.005775999898711841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,15,0.005941333249211311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,31,0.008037333066264788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,31,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,31,0.005797333394487699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,31,0.005749333028992017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,63,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,63,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,63,0.005722666780153911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,63,0.00589866687854131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,127,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,127,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,127,0.006298666819930077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,127,0.006128000095486641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,255,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,255,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,255,0.0069973332186539965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,255,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,511,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,511,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,511,0.007231999809543292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,511,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,1023,0.01227733368674914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,1023,0.012432000289360682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,2047,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,1023,0.007087999954819679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,1023,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,2047,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,2047,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,2047,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,4095,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,4095,0.007184000064929326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,4095,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,4095,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,8191,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,8191,0.02808533360560735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,8191,0.007573333258430163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,8191,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,16383,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,16383,0.03842666745185852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,16383,0.007962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,16383,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,32767,0.028549333413441975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,32767,0.06101333101590475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,32767,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,32767,0.036159999668598175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,1,0.007760000104705493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,1,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,65535,0.035386666655540466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,1,0.005562666803598404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,1,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,3,0.007967999825874964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,3,0.005674666414658229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,65535,0.09265599648157756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,7,0.007925333455204964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,65535,0.007823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,3,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,65535,0.053173333406448364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,3,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,7,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,7,0.005775999898711841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,7,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,15,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,15,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,15,0.0058133335163195925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,15,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,31,0.007978666573762894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,31,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,31,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,31,0.005978666866819064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,63,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,63,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,63,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,63,0.005679999788602193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,127,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,255,0.00843733362853527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,127,0.006079999729990959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,127,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,127,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,255,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,255,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,255,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,511,0.011493333925803503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,511,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,511,0.007093333328763644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,511,0.008282666405042013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,1023,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,1023,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,1023,0.00702400008837382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,1023,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,2047,0.018640000373125076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,2047,0.024847999215126038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,2047,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,2047,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,4095,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,4095,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,4095,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,4095,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,8191,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,8191,0.04481600224971771
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,8191,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,8191,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,16383,0.03374933451414108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,16383,0.06780266761779785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,16383,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,16383,0.03065066784620285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,32767,0.034202667574087776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,32767,0.007717333113153775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,32767,0.09186666210492452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,32767,0.05124799907207489
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,1,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,1,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,65535,0.03442666679620743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,65535,0.13910399874051413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,1,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,65535,0.08458667000134786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,65535,0.007776000226537387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,1,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,3,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,3,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,3,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,3,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,7,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,7,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,7,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,7,0.008634666601816813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,15,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,15,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,15,0.00816000004609426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,15,0.008058666562040647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,31,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,31,0.012122667084137598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,31,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,31,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,127,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,63,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,63,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,63,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,63,0.008010666817426682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,127,0.011952000359694162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,127,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,127,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,255,0.02250133454799652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,255,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,255,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,511,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,511,0.029861333469549816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,511,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,511,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,1023,0.039247999588648476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,1023,0.047744000951449074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,1023,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,1023,0.028213332096735638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,2047,0.04152533411979675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,2047,0.06182933350404104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,2047,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,2047,0.0366239994764328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,4095,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,4095,0.08141333361466725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,4095,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,4095,0.05409066875775655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,1,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,1,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,1,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,1,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,3,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,3,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,3,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,3,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,7,0.01569066693385442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,15,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,7,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,7,0.00730666642387708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,7,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,15,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,15,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,15,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,31,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,31,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,31,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,31,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,63,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,63,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,63,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,63,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,127,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,127,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,127,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,127,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,255,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,511,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,255,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,255,0.0100853331387043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,255,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,511,0.03664000084002813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,511,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,511,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,1023,0.03681066632270813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,1023,0.050010666251182556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,1023,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,1023,0.024336000283559162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,2047,0.03956266740957896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,2047,0.06817066669464111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,2047,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,2047,0.03398400048414866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,4095,0.03913066784540812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,4095,0.08939199646313985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,1,0.024133334557215374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,4095,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,1,0.023498666783173878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,4095,0.05611733098824819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,1,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,1,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,3,0.023914667467276256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,3,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,3,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,7,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,3,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,7,0.0069226666043202085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,7,0.024005333582560223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,7,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,15,0.023904000719388325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,15,0.023578666150569916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,15,0.008261333530147871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,15,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,31,0.023685333629449207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,31,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,31,0.007647999872763951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,31,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,63,0.023749334116776783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,63,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,127,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,63,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,63,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,255,0.013744000345468521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,127,0.0235359991590182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,127,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,127,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,255,0.0439573327700297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,255,0.04340266684691111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,255,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,511,0.043477331598599754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,511,0.05247466762860616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,511,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,511,0.020629333953062694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,1023,0.04409599800904592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,1023,0.06638399759928386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,1023,0.014298666268587112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,1023,0.028389332195123036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,2047,0.046021332343419395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,2047,0.09175466497739156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,2047,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,2047,0.04643733302752177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,4095,0.04619733492533366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,4095,0.013658666362365087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,4095,0.13498666882514954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,1,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,4095,0.07854933540026347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,1,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,1,0.04055999964475632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,1,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,3,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,3,0.0408746674656868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,7,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,3,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,7,0.040821333726247154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,7,0.04045866678158442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,15,0.0408693328499794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,31,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,15,0.04021333406368891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,15,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,31,0.04049599915742874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,31,0.040394666294256844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,63,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,63,0.0405173326532046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,63,0.03956799954175949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,63,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,127,0.039994666973749794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,127,0.04018666595220566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,255,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,127,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,127,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,255,0.04062400013208389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,255,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,255,0.04178666571776072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,511,0.04161600023508072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,511,0.013359999905029932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,511,0.05366933345794678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,511,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,1023,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,2047,0.12176533540089925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,1023,0.07540266712506612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,1023,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,1023,0.04321600000063578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,4095,0.2093706727027893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,2047,0.04398400088151296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,2047,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,2047,0.07492266595363617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,4095,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,4095,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,4095,0.13959466417630514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,1,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,1,0.017701332767804463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,1,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,3,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,1,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,3,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,3,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,3,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,7,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,15,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,7,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,7,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,7,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,15,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,15,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,15,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,31,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,31,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,31,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,31,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,63,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,63,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,63,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,63,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,127,0.018138666947682697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,127,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,127,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,127,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,255,0.03256533294916153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,255,0.03236266722281774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,255,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,255,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,511,0.04423999786376953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,511,0.04945066571235657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,511,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,511,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,1023,0.04371733466784159
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,1023,0.06080000102519989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,1023,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,1023,0.036229332288106285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,1,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,1,0.024160000185171764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,1,0.007930666829148928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,1,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,3,0.024031999210516613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,3,0.02404266595840454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,3,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,7,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,7,0.024325333535671234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,3,0.008378666515151659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,7,0.024357333779335022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,7,0.007946666950980822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,15,0.024010665714740753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,15,0.02473066747188568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,15,0.007893333211541176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,15,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,31,0.024495999018351238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,31,0.02436800052722295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,31,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,31,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,63,0.024490666886170704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,63,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,63,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,63,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,127,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,127,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,127,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,127,0.009984000275532404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,255,0.04427733520666758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,255,0.04418666660785675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,255,0.015696000307798386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,255,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,511,0.0444106658299764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,511,0.05338133374849955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,511,0.01573333392540614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,511,0.024277334411938984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,1023,0.034517332911491394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,1023,0.044250667095184326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,1023,0.0672106643517812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,1023,0.015722667177518208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,1,0.040922666589419045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,1,0.04066666712363561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,1,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,1,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,3,0.041034666200478874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,3,0.04062400013208389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,3,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,3,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,7,0.04091733445723852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,7,0.040522667268911995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,7,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,7,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,15,0.04101866732041041
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,15,0.04067199925581614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,15,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,15,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,31,0.04101333270470301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,31,0.04053866614898046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,31,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,31,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,63,0.04051200052102407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,63,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,63,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,127,0.04054400076468786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,127,0.04005866746107737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,127,0.013386666774749756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,127,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,255,0.04230933388074239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,255,0.04130133241415024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,255,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,255,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,511,0.04206933577855428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,511,0.05463466544946035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,511,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,511,0.02641066660483678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,1023,0.07689066727956136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,1023,0.04347200194994608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,1023,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,1023,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,1,0.07309333483378093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,1,0.07261333366235097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,1,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,3,0.07286933561166127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,1,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,3,0.07323200007279713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,3,0.018581333259741466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,3,0.018602666755517323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,7,0.07252799967924754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,7,0.0737120012442271
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,7,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,7,0.018570666511853535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,15,0.07333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,15,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,15,0.07300266623497009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,15,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,31,0.07275733351707458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,31,0.0738506664832433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,63,0.07212799787521362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,31,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,63,0.07175999879837036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,31,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,63,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,63,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,127,0.07310933371384938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,127,0.0722453345855077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,127,0.02178666740655899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,127,0.02178666740655899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,255,0.07717866698900859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,255,0.0761599987745285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,255,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,255,0.03051200012365977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,511,0.07714133461316426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,511,0.09597333272298177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,511,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,511,0.04775466521581014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,1023,0.07653866708278656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,1023,0.14139733711878458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,1023,0.022416000564893086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,1023,0.07886933286984761
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,1,0.014159999787807465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,1,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,1,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,1,0.014661333213249842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,7,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,3,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,3,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,3,0.030602666238943737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,3,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,7,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,7,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,15,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,7,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,31,0.029946667452653248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,31,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,15,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,15,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,15,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,31,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,31,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,127,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,63,0.014058666924635569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,63,0.029701332251230877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,63,0.02978666623433431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,63,0.014122666170199713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,127,0.016021333634853363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,127,0.030426666140556335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,127,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,255,0.05494933327039083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,255,0.05409066875775655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,255,0.024746666351954143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,255,0.028490667541821797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,511,0.05494399865468343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,511,0.06533333162466685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,1,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,511,0.035402665535608925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,511,0.02480533222357432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,1,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,1,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,3,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,1,0.013280000537633896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,3,0.041834667325019836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,3,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,3,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,7,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,7,0.04275199770927429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,15,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,7,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,7,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,15,0.04177066683769226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,15,0.04298666616280874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,15,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,31,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,63,0.04114133367935816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,31,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,31,0.04268266757329305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,31,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,63,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,63,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,63,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,127,0.041296000281969704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,127,0.041162667175134025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,127,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,255,0.04363200068473816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,127,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,255,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,255,0.04311466713746389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,255,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,511,0.030933332939942677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,511,0.04372799893220266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,511,0.0562666654586792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,511,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,1,0.019434666881958645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,1,0.019509332875410717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,1,0.07468800246715546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,1,0.07378133138020833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,3,0.07414400080839793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,3,0.07403733332951863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,3,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,3,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,7,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,7,0.07472000022729237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,7,0.07452266911665599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,7,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,15,0.07390933235486348
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,15,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,15,0.07428266604741414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,15,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,31,0.07469333211580913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,31,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,31,0.07478933533032735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,31,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,63,0.07291733225186665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,63,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,63,0.07310933371384938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,127,0.07341333230336507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,63,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,127,0.022533332308133442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,127,0.07399466633796692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,127,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,255,0.07924800117810567
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,255,0.022634667654832203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,255,0.07850666840871175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,511,0.07926400005817413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,511,0.02271999915440877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,255,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,511,0.10041067004203796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,511,0.04857600231965383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,1,0.13595199584960938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,1,0.03277866790692011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,1,0.0327360009153684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,1,0.13621333241462708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,3,0.13552000125249228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,3,0.13762666781743368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,3,0.032698666055997215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,3,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,7,0.13580800096193948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,7,0.13616533080736795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,7,0.03274133304754893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,7,0.033045334120591484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,15,0.13552000125249228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,15,0.03270933280388514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,15,0.13756799697875977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,15,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,31,0.13565333684285483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,31,0.1362399955590566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,31,0.032618666688601174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,31,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,63,0.1349440018335978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,63,0.13512532909711203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,63,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,63,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,127,0.13615999619166055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,127,0.13730133573214212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,127,0.0397119993964831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,127,0.04021333406368891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,255,0.14307199915250143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,255,0.14309333761533102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,255,0.04064533362785975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,511,0.14357866843541464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,255,0.05815466741720835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,511,0.18251200517018637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,1,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,511,0.04081599911053976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,1,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,1,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,511,0.08755200107892354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,1,0.007791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,3,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,3,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,3,0.007834666719039282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,3,0.007680000116427739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,7,0.008629333227872849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,7,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,7,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,7,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,15,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,15,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,15,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,63,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,15,0.007706666365265846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,31,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,31,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,31,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,31,0.007834666719039282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,63,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,63,0.007776000226537387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,63,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,127,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,127,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,127,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,511,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,127,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,255,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,255,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,255,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,255,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,511,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,511,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,511,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,1023,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,1023,0.013525333255529404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,1023,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,1023,0.012453333785136541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,2047,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,2047,0.016421332955360413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,2047,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,2047,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,4095,0.015696000307798386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,4095,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,4095,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,4095,0.01443733274936676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,8191,0.02022933339079221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,16383,0.02075200031201045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,8191,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,8191,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,8191,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,16383,0.02991466720898946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,16383,0.009861333295702934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,16383,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,32767,0.02476266771554947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,32767,0.04048533240954081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,32767,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,32767,0.044250667095184326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,1,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,1,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,1,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,1,0.006613333399097125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,65535,0.06835199892520905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,65535,0.03031466652949651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,3,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,3,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,65535,0.07586133480072021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,3,0.006506666541099548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,65535,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,3,0.006549333532651265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,7,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,7,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,7,0.007525333513816197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,7,0.006688000013430913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,15,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,15,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,15,0.007231999809543292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,15,0.006730666384100914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,31,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,31,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,31,0.007482666522264481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,127,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,31,0.006730666384100914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,63,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,63,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,63,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,63,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,127,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,255,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,127,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,127,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,255,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,255,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,1023,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,255,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,1023,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,511,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,511,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,511,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,511,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,1023,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,1023,0.0100853331387043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,2047,0.01628799984852473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,2047,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,2047,0.008127999802430471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,8191,0.0201706662774086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,2047,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,4095,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,4095,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,4095,0.008165333420038223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,4095,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,8191,0.0284853329261144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,8191,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,8191,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,16383,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,16383,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,16383,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,16383,0.0262719988822937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,32767,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,32767,0.06131199995676676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,32767,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,32767,0.0443146675825119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,1,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,1,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,65535,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,1,0.005578666925430298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,1,0.005728000154097875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,3,0.005999999741713206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,3,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,65535,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,65535,0.09339200456937154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,7,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,65535,0.0687360018491745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,3,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,3,0.005578666925430298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,7,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,7,0.008192000289758047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,15,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,7,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,15,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,15,0.005775999898711841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,15,0.0060159998635451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,31,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,31,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,31,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,31,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,127,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,63,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,63,0.005679999788602193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,255,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,63,0.006101333225766818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,127,0.011546666423479715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,127,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,127,0.008421333506703377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,255,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,255,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,255,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,511,0.011760000139474869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,511,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,511,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,1023,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,511,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,1023,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,1023,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,1023,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,2047,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,2047,0.024490666886170704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,2047,0.007733333234985669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,2047,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,4095,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,8191,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,4095,0.03275199979543686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,4095,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,4095,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,8191,0.043791999419530235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,8191,0.027866666515668232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,8191,0.023823998868465424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,16383,0.033189333975315094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,16383,0.06765333314736684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,16383,0.008016000191370646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,16383,0.031136001149813335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,32767,0.034927998979886375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,32767,0.09176533420880635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,32767,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,32767,0.05367999772230784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,65535,0.03419733295838038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,1,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,1,0.010175999874869982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,1,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,1,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,65535,0.0860586663087209
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,3,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,65535,0.13917866349220276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,65535,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,3,0.005775999898711841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,3,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,7,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,7,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,7,0.0058133335163195925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,7,0.005941333249211311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,15,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,15,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,15,0.005754666402935982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,15,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,63,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,31,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,31,0.005872000008821487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,31,0.0058453331391016645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,63,0.010159999753038088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,63,0.005759999776879947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,63,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,127,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,127,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,127,0.006288000072042148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,127,0.006208000083764394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,255,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,255,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,255,0.0069973332186539965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,255,0.008042666440208754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,511,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,511,0.017829333742459614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,511,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,511,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,1023,0.021669333179791767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,1023,0.024538666009902954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,1023,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,2047,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,2047,0.03624533365170161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,2047,0.008000000069538752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,2047,0.016352000335852306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,4095,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,8191,0.03421866645415624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,4095,0.05236800014972687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,4095,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,4095,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,8191,0.06643199920654297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,8191,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,8191,0.028736000259717304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,16383,0.03453866640726725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,16383,0.08876267075538635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,16383,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,16383,0.046709333856900535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,32767,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,32767,0.13200533390045166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,32767,0.0081386665503184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,32767,0.07986133297284444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,65535,0.03375466664632162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,1,0.0525546669960022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,1,0.05227200190226237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,65535,0.14305599530537924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,65535,0.21892799933751425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,65535,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,1,0.019952000429232914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,1,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,3,0.05266133447488149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,3,0.05226666728655497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,3,0.019920000185569126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,7,0.052442664901415505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,3,0.020448000480731327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,7,0.0537066658337911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,7,0.019834666202465694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,7,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,15,0.05229333539803823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,15,0.0525439977645874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,15,0.02000533292690913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,15,0.019861333072185516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,31,0.019925333559513092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,31,0.053029333551724754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,31,0.05373333394527435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,63,0.05160533388455709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,63,0.051914667089780174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,31,0.02045866722861926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,63,0.01993600030740102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,63,0.01972266659140587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,127,0.05183466772238413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,127,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,127,0.05219733218352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,127,0.023930666347344715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,255,0.05630399783452352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,255,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,255,0.030261332790056866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,1,0.0763679991165797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,255,0.05492799977461497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,1,0.07621333499749501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,1,0.022266666094462078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,3,0.07631466786066692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,3,0.02180800090233485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,1,0.022389332453409832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,3,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,3,0.07749333480993907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,7,0.07586666444937389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,7,0.021914665897687275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,7,0.07690666615962982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,15,0.07640533149242401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,7,0.02231466770172119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,15,0.021754667162895203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,15,0.07750933369000752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,31,0.0763679991165797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,31,0.07628266513347626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,15,0.022298666338125866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,31,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,31,0.022405333817005157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,63,0.0751039981842041
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,63,0.07498133182525635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,63,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,127,0.07598933577537537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,63,0.022106667359670002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,127,0.026730666557947796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,127,0.07714666426181793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,255,0.0809333324432373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,255,0.02682666728893916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,127,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,255,0.08147199948628743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,255,0.03812800099452337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,1,0.1376426617304484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,1,0.03387200087308884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,1,0.1393226683139801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,3,0.13728533188501993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,1,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,3,0.03320533285538355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,7,0.13795199990272522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,7,0.13766400019327799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,3,0.13890666762987772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,3,0.03387733300526937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,7,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,7,0.03408000121514002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,15,0.13729066650072733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,15,0.03379199902216593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,15,0.13860266407330832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,31,0.13763200243314108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,15,0.03412266572316488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,31,0.13826666275660196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,31,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,63,0.13805866241455078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,31,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,63,0.03322133421897888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,63,0.13853866855303446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,127,0.14034133156140646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,63,0.03395200024048487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,127,0.042778665820757546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,127,0.14083199699719748
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,255,0.1479466656843821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,127,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,255,0.04294399917125702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,255,0.14800533652305603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,1,0.26451200246810913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,1,0.06532266736030579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,255,0.06035199761390686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,1,0.26411734024683636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,3,0.2638559937477112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,1,0.06680533289909363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,3,0.06540266672770183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,3,0.06543466448783875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,3,0.26470933357874554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,7,0.06551466882228851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,7,0.26577067375183105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,7,0.26401599248250324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,7,0.06681600213050842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,15,0.2632053295771281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,15,0.06566933294137318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,15,0.2654186685880025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,15,0.06603733201821645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,31,0.2679786682128906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,31,0.06518933176994324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,31,0.2659306724866231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,63,0.267301340897878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,31,0.06670400003592174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,63,0.06553066770235698
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,63,0.06613866488138835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,127,0.26436267296473187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,63,0.26917866865793866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,127,0.26703999439875287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,127,0.07880533238252004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,127,0.07922666768232982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,255,0.27677865823109943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,255,0.2783199946085612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,255,0.10705066720644633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,255,0.07999999821186066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,1,0.035973332822322845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,3,0.09693333506584167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,1,0.09726400176684062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,3,0.03573866685231527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,1,0.09742400050163269
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,1,0.03664000084002813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,3,0.09790399670600891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,7,0.0965280036131541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,3,0.036720000207424164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,7,0.09691199660301208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,7,0.035936000446478523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,7,0.03646933287382126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,15,0.03583999971548716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,15,0.09749866525332133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,15,0.09662933150927226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,31,0.0972053309281667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,31,0.03576533248027166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,15,0.03651199986537298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,31,0.09788800279299419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,63,0.0960586667060852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,63,0.03586666782697042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,31,0.036474667489528656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,127,0.09753599762916565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,63,0.09685333569844563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,63,0.03651199986537298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,127,0.04480533301830292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,127,0.04488533238569895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,1,0.14330666263898215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,1,0.04048533240954081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,3,0.14251733819643655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,1,0.1434346636136373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,1,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,3,0.14285332957903543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,3,0.04088533421357473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,7,0.14298133055369058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,3,0.04127466678619385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,7,0.14270933469136557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,15,0.14385599891344705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,15,0.040618665516376495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,7,0.04211199780305227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,7,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,15,0.14346667130788168
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,31,0.14351466298103333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,15,0.0412266676624616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,31,0.1444480021794637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,31,0.04064533362785975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,63,0.04043200115362803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,31,0.04200000067551931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,63,0.04074133435885111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,63,0.143994669119517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,63,0.1437333325544993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,127,0.1458133359750112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,127,0.14513066411018372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,127,0.05665066838264465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,127,0.05725333094596863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,1,0.2690826654434204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,1,0.2672373255093892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,3,0.26785600185394287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,127,0.09802132844924927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,7,0.26790932814280194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,3,0.26867733399073285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,3,0.07151466608047485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,3,0.07043200234572093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,7,0.07041599849859874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,7,0.2702186703681946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,7,0.07057066758473714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,15,0.2704586585362752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,15,0.07051733136177063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,15,0.2690560022989909
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,15,0.07127999762694041
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,31,0.2746559977531433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,31,0.07042666773001353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,1,0.07021866738796234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,31,0.273034671942393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,31,0.07115733126799266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,1,0.0702453354994456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,63,0.27196266253789264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,63,0.0702400008837382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,63,0.2731893261273702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,127,0.27296533187230426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,63,0.07082133491834004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,127,0.08308266599973042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,1,0.5202293395996094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,127,0.27530133724212646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,127,0.08422399560610454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,1,0.13160000244776407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,1,0.51747198899587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,1,0.13216533263524374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,3,0.5169599850972494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,3,0.13205333550771078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,3,0.5242506663004557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,7,0.5192906856536865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,3,0.13244799772898355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,7,0.5236479838689169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,7,0.13165866335233053
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,15,0.5232640107472738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,7,0.1322933336098989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,15,0.526688019434611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,15,0.13130133350690207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,15,0.1323466698328654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,31,0.13109333316485086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,31,0.532480001449585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,63,0.1309866706530253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,63,0.5297866662343343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,31,0.5334453185399374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,63,0.5304746627807617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,1,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,127,0.5196160078048706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,1,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,1,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,1,0.007760000104705493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,63,0.1323199967543284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,127,0.15128533045450845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,3,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,3,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,3,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,3,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,7,0.014261333892742792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,7,0.014463999619086584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,7,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,7,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,15,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,15,0.01423466702302297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,15,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,15,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,31,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,31,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,31,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,31,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,63,0.008165333420038223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,63,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,63,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,63,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,127,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,127,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,127,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,127,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,255,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,255,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,255,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,255,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,511,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,511,0.013376000026861826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,511,0.010154666379094124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,511,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,1023,0.013327999661366144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,1023,0.013450667262077332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,31,0.13165332873662314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,1023,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,1023,0.011733333269755045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,2047,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,2047,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,2047,0.010191999996701876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,2047,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,4095,0.019952000429232914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,4095,0.023941333095232647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,127,0.15237866838773093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,8191,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,4095,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,8191,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,4095,0.018325333793958027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,8191,0.03446933378775915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,127,0.5222506523132324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,8191,0.026975999275843304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,16383,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,16383,0.047055999437967934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,16383,0.010368000095089277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,16383,0.0432533323764801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,32767,0.03385599950949351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,1,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,32767,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,1,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,1,0.007274666801095009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,32767,0.06163200239340464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,32767,0.0758240024248759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,1,0.007093333328763644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,3,0.013888000200192133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,3,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,3,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,3,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,7,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,7,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,7,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,7,0.006720000257094701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,15,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,15,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,15,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,15,0.006757333253820737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,31,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,31,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,31,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,31,0.006655999769767125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,63,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,63,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,63,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,63,0.006618666773041089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,127,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,127,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,127,0.00749333327015241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,127,0.007381333038210869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,255,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,255,0.011920000116030375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,255,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,255,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,511,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,511,0.012458667159080505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,511,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,511,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,1023,0.01634666696190834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,1023,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,1023,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,4095,0.023610666394233704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,1023,0.01051733394463857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,2047,0.01978133370478948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,4095,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,2047,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,2047,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,8191,0.04580266773700714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,2047,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,4095,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,4095,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,8191,0.02757333219051361
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,8191,0.008341333518425623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,8191,0.028586665789286297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,16383,0.03457066665093104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,16383,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,16383,0.06910400092601776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,16383,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,1,0.010549332946538925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,32767,0.09227200349171956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,32767,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,32767,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,1,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,1,0.007184000064929326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,3,0.0057386669019858045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,1,0.008367999767263731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,32767,0.06584533552328746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,3,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,3,0.010277333358923594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,3,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,7,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,7,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,7,0.0058133335163195925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,7,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,15,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,15,0.010469333579142889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,15,0.006042666733264923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,15,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,63,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,31,0.009925333162148794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,31,0.010496000448862711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,31,0.0058613332609335584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,31,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,127,0.010506667196750641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,127,0.006293333445986112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,63,0.010181333248813948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,63,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,63,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,127,0.009935999910036722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,127,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,255,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,255,0.012170666207869848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,255,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,255,0.008165333420038223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,1023,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,511,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,511,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,511,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,511,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,1023,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,1023,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,1023,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,2047,0.028079998989899952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,2047,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,4095,0.008266666904091835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,2047,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,2047,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,4095,0.03374933451414108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,4095,0.05287999908129374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,4095,0.02216533323129018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,8191,0.03375466664632162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,8191,0.06665066878000896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,8191,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,8191,0.030026666820049286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,16383,0.04708800216515859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,16383,0.03391999999682108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,16383,0.08846933643023173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,16383,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,32767,0.033930666744709015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,1,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,32767,0.07999466856320699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,1,0.0069759997228781385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,32767,0.13266133268674216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,32767,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,1,0.005978666866819064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,3,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,3,0.010319999729593595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,3,0.00600533311565717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,15,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,3,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,7,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,7,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,7,0.007130666946371396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,7,0.00595199999709924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,15,0.010191999996701876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,15,0.006101333225766818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,15,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,31,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,31,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,31,0.007082666580875714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,31,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,63,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,63,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,63,0.005984000240763028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,63,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,127,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,127,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,127,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,127,0.007269333427151044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,255,0.01782400036851565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,255,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,255,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,255,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,511,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,511,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,511,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,2047,0.035562666753927864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,511,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,2047,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,1023,0.03271999955177307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,1023,0.03955733279387156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,1023,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,1023,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,2047,0.051829333106676735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,2047,0.020389333367347717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,8191,0.034789333740870156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,4095,0.03510933369398117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,8191,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,4095,0.0665226678053538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,4095,0.008634666601816813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,4095,0.0278613343834877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,8191,0.08804800113042195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,8191,0.046154667933781944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,16383,0.03474666674931844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,16383,0.07852266728878021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,16383,0.13193066914876303
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,16383,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,32767,0.03442666679620743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,1,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,32767,0.2174933354059855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,1,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,32767,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,3,0.014303999642531076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,32767,0.14166399836540222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,3,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,7,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,7,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,15,0.018229333062966663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,15,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,31,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,31,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,63,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,63,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,127,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,127,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,255,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,255,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,511,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,511,0.01258133351802826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,1023,0.024106666445732117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,1023,0.01575999955336253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,2047,0.03541333228349686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,8191,0.07456533114115398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,2047,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,4095,0.0518453319867452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,4095,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,8191,0.05717866619427999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,1,0.01033599985142549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,16383,0.11025066177050273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,1,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,16383,0.08603733777999878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,3,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,3,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,7,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,7,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,15,0.007631999750932057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,31,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,255,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,63,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,31,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,63,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,127,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,1023,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,127,0.007717333113153775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,255,0.010170666500926018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,511,0.020362666497627895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,511,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,1023,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,2047,0.043605332573254905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,2047,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,4095,0.06771199901898702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,4095,0.03629866739114126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,8191,0.08708266417185466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,8191,0.05154666801293691
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,16383,0.12179733316103618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,1,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,1,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,16383,0.07403733332951863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,3,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,31,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,3,0.006271999950210254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,7,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,7,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,15,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,15,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,31,0.007050666958093643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,63,0.011274666835864386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,63,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,127,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,127,0.007631999750932057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,255,0.0216799999276797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,255,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,511,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,511,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,1023,0.047082667549451195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,1023,0.022250667214393616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,2047,0.06520000100135803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,2047,0.028442665934562683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,4095,0.04058666775623957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,4095,0.08405333757400513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,8191,0.12065066893895467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,8191,0.059952000776926674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,16383,0.1921493411064148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,16383,0.09947733084360759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,1,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,3,0.00702400008837382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,15,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,7,0.007391999786098798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,31,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,63,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,127,0.007631999750932057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,1,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,7,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,3,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,511,0.0462666650613149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,255,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,15,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,1023,0.0614879975716273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,31,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,1023,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,2047,0.08473066488901775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,63,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,511,0.01966933285196622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,127,0.015967999895413715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,4095,0.11896000305811565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,4095,0.05624533196290334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,255,0.03107200066248576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,8191,0.19104532400767008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,8191,0.09737066427866618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,1,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,1,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,16383,0.3317813277244568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,3,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,16383,0.1777120033899943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,15,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,3,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,7,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,7,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,15,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,31,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,31,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,63,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,63,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,127,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,127,0.010399999717871347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,255,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,255,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,511,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,511,0.013280000537633896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,4095,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,1023,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,1023,0.014010666559139887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,2047,0.02788266787926356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,2047,0.015840000162522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,4095,0.01783466711640358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,8191,0.02256533255179723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,8191,0.021914665897687275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,16383,0.024608001112937927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,16383,0.026799999177455902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,32767,0.03310933212439219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,32767,0.02992533395687739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,1,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,1,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,2047,0.0372533326347669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,3,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,65535,0.03626666714747747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,3,0.006821333120266597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,7,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,7,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,65535,0.042490666111310325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,15,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,15,0.006655999769767125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,127,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,31,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,31,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,63,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,63,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,127,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,255,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,255,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,511,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,511,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,1023,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,1023,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,2047,0.02807466685771942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,2047,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,4095,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,4095,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,8191,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,8191,0.016362667083740234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,16383,0.02027733375628789
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,16383,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,32767,0.02477866659561793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,32767,0.02455466737349828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,1,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,7,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,1,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,15,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,3,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,3,0.006117333347598712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,65535,0.030394665896892548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,65535,0.030933332939942677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,7,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,15,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,31,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,31,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,63,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,63,0.00625599982837836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,127,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,127,0.00843733362853527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,255,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,255,0.008016000191370646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,511,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,511,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,1023,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,1023,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,2047,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,2047,0.010005333150426546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,4095,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,4095,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,8191,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,8191,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,16383,0.02060266708334287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,16383,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,32767,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,1,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,3,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,32767,0.021802666286627453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,1,0.005557333429654439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,7,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,65535,0.046682665745417275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,65535,0.0269813338915507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,31,0.010112000008424124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,7,0.005578666925430298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,15,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,15,0.005610666548212369
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,31,0.007087999954819679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,63,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,63,0.0057386669019858045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,255,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,127,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,127,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,1023,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,255,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,511,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,2047,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,4095,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,511,0.007663999994595845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,1023,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,2047,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,4095,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,8191,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,8191,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,16383,0.02977066735426585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,16383,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,32767,0.048250665267308555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,32767,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,1,0.008879999940594038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,1,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,3,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,3,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,7,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,15,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,7,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,31,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,65535,0.07209066549936931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,15,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,65535,0.04674666623274485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,31,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,63,0.012351999680201212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,63,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,127,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,127,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,255,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,255,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,2047,0.035599999129772186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,511,0.016314666718244553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,511,0.013477332890033722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,1023,0.027994667490323383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,1023,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,8191,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,2047,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,4095,0.018640000373125076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,4095,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,8191,0.0220266655087471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,16383,0.026015999416510265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,16383,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,32767,0.03102933367093404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,32767,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,3,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,1,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,1,0.006826666494210561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,65535,0.037920000652472176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,3,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,7,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,31,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,7,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,65535,0.043290664752324425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,15,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,15,0.00679466687142849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,31,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,63,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,63,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,511,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,511,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,127,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,1023,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,3,0.008367999767263731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,2047,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,127,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,4095,0.016650666793187458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,255,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,255,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,8191,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,1023,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,2047,0.01578666642308235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,4095,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,8191,0.020106667031844456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,32767,0.036015999813874565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,32767,0.02664000044266383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,16383,0.02216000109910965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,16383,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,1,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,1,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,3,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,65535,0.04747733473777771
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,3,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,7,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,31,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,65535,0.035504000882307686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,7,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,15,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,15,0.006250666454434395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,31,0.007882666463653246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,63,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,63,0.006224000205596288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,127,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,127,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,255,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,255,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,511,0.008053333188096682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,511,0.016389333953460056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,1023,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,1023,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,2047,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,2047,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,4095,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,16383,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,4095,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,8191,0.026191999514897663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,8191,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,32767,0.049973333875338234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,16383,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,32767,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,65535,0.07396799822648366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,1,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,7,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,1,0.005648000165820122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,65535,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,3,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,3,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,7,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,63,0.011663999408483505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,15,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,15,0.00589866687854131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,31,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,31,0.005717333406209946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,511,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,511,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,63,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,127,0.01181866725285848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,127,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,255,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,255,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,1023,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,1023,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,2047,0.02022933339079221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,2047,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,4095,0.02418133368094762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,4095,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,8191,0.03502399971087774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,8191,0.01640533283352852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,16383,0.0499946673711141
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,32767,0.03956266740957896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,16383,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,32767,0.07815466821193695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,3,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,7,0.012469333906968435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,65535,0.06562666594982147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,1,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,1,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,3,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,65535,0.12366933623949687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,7,0.009343999748428663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,15,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,15,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,31,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,127,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,255,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,31,0.010181333248813948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,63,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,1023,0.03436266630887985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,63,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,127,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,255,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,511,0.02425066630045573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,511,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,1023,0.02677333354949951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,2047,0.05385066568851471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,2047,0.03666666646798452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,4095,0.08558400472005208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,4095,0.06258666515350342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,1,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,8191,0.10892800490061443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,1,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,3,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,8191,0.0832426647345225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,3,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,7,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,7,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,15,0.011690666278203329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,15,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,31,0.011493333925803503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,31,0.007903999959429106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,255,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,63,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,127,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,127,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,255,0.02269333352645238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,511,0.02975466599067052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,511,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,1023,0.04765866696834564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,1023,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,2047,0.06589333216349284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,2047,0.03595733394225439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,4095,0.0865066647529602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,4095,0.0513919989267985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,8191,0.1206826666990916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,1,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,1,0.006762666627764702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,7,0.00702400008837382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,8191,0.07531733314196269
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,3,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,3,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,7,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,15,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,15,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,31,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,31,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,255,0.0310506671667099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,63,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,63,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,127,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,127,0.007930666829148928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,255,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,511,0.04762133459250132
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,511,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,1023,0.062447999914487205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,1023,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,2047,0.08456533153851827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,2047,0.040789333482583366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,4095,0.05840533475081126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,4095,0.11982400218645732
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,1,0.025744001070658367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,8191,0.19348265727361044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,8191,0.09827199578285217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,1,0.010277333358923594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,3,0.02587199956178665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,3,0.010288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,7,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,7,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,15,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,15,0.025701334079106648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,31,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,31,0.01020800011853377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,63,0.01020800011853377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,511,0.06434133152167003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,63,0.025834667185942333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,127,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,1023,0.08155733346939087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,127,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,2047,0.12140267093976338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,255,0.05012799799442291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,255,0.020074666788180668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,4095,0.19158933560053507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,511,0.026234666506449383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,1023,0.037578667203585304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,2047,0.05630399783452352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,3,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,4095,0.09683733185132344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,1,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,8191,0.33348266283671063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,1,0.009525333220760027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,8191,0.17750400304794312
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,3,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,7,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,7,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,15,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,15,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,31,0.010378666842977205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,31,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,63,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,63,0.009573333586255709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,511,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,1023,0.030613332986831665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,127,0.011663999408483505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,255,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,255,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,511,0.02647999922434489
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,1023,0.01379199946920077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,2047,0.019823999454577763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,2047,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,4095,0.019962667177120846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,4095,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,8191,0.024106666445732117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,8191,0.022330666581789654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,16383,0.026954665780067444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,16383,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,32767,0.04537599782148997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,32767,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,1,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,1,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,3,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,3,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,65535,0.05045866469542185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,15,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,65535,0.061749334136645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,7,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,7,0.007605333502093951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,15,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,127,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,31,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,31,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,63,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,63,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,127,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,255,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,255,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,511,0.026341333985328674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,511,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,1023,0.014533333480358124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,1023,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,2047,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,2047,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,4095,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,4095,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,16383,0.021898667017618816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,8191,0.026416001220544178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,8191,0.018778666853904724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,32767,0.05147733290990194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,16383,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,32767,0.03200533241033554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,1,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,1,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,3,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,3,0.0069440001000960665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,7,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,7,0.006079999729990959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,65535,0.07660266757011414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,15,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,65535,0.06115200122197469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,127,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,15,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,31,0.010288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,31,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,63,0.011434666812419891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,63,0.00624533308049043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,127,0.006773333375652631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,255,0.012432000289360682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,255,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,511,0.013557333499193192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,511,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,4095,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,1023,0.014165333161751429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,1023,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,2047,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,2047,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,4095,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,8191,0.03649600098530451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,8191,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,16383,0.05285866558551788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,16383,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,32767,0.08009600142637889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,32767,0.04054400076468786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,1,0.009578666960199675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,1,0.007050666958093643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,3,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,65535,0.1267626682917277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,3,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,7,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,7,0.00596266674498717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,65535,0.0682773341735204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,15,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,15,0.00731733317176501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,31,0.01028266673286756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,31,0.005999999741713206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,63,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,63,0.005722666780153911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,127,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,127,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,255,0.013274667163689932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,255,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,511,0.0136266661187013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,511,0.008122666428486506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,1023,0.019760000209013622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,1023,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,2047,0.027957332630952198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,2047,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,4095,0.040394666294256844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,16383,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,4095,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,8191,0.05899733304977417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,8191,0.024282666544119518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,16383,0.08693866928418477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,32767,0.12285332878430684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,32767,0.06187200049559275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,1,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,3,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,1,0.009557333464423815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,3,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,65535,0.20031466086705527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,7,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,7,0.009861333295702934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,65535,0.1067573328812917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,15,0.013546666751305262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,15,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,31,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,31,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,63,0.01368533323208491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,255,0.013674666484196981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,63,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,127,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,127,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,255,0.0262719988822937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,511,0.03571200122435888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,511,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,1023,0.05853333572546641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,1023,0.045082668463389076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,4095,0.10850133498509724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,2047,0.08449600140253703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,2047,0.06346133351325989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,1,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,1,0.007882666463653246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,4095,0.08165866633256276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,3,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,3,0.007871999715765318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,7,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,7,0.008309333274761835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,15,0.016741332908471424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,15,0.007893333211541176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,31,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,31,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,63,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,63,0.008309333274761835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,127,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,127,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,255,0.03166933357715607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,255,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,511,0.048800001541773476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,511,0.02585600068171819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,1023,0.06302399933338165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,1023,0.03381866713364919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,2047,0.08640000224113464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,2047,0.05096533397833506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,4095,0.12217600146929423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,1,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,1,0.02624533325433731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,4095,0.0732479989528656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,3,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,3,0.011306667079528173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,7,0.026613332331180573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,7,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,15,0.026122666895389557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,15,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,31,0.026213333010673523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,63,0.025578667720158894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,31,0.011418666690587997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,63,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,127,0.026709333062171936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,127,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,255,0.051072001457214355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,255,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,511,0.06541333099206288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,511,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,1023,0.08319999774297078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,1023,0.041093334555625916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,2047,0.12221866846084595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,2047,0.059248000383377075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,3,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,4095,0.19337600469589233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,4095,0.09825600186983745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,1,0.04471466441949209
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,1,0.013722666849692663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,3,0.045141334335009255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,7,0.04588800172011057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,7,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,15,0.04515199859937032
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,15,0.013967999567588171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,31,0.044218664367993675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,31,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,255,0.053674668073654175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,63,0.043749332427978516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,63,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,127,0.04636266827583313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,127,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,255,0.021840001145998638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,511,0.06962133447329204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,511,0.033802665770053864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,1023,0.10502933462460835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,1023,0.05288533369700114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,2047,0.1765013337135315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,2047,0.09447999795277913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,1,0.020293333878119785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,4095,0.17355199654897055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,4095,0.311626672744751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,1,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,3,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,7,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,7,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,15,0.020448000480731327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,31,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,63,0.02086399992307027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,127,0.020047999918460846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,127,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,511,0.04364266494909922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,255,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,1023,0.05981333553791046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,255,0.02383466561635335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,511,0.0609440008799235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,1023,0.08025600016117096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,1,0.02703999976317088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,1,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,3,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,3,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,7,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,7,0.013338666409254074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,15,0.027888000011444092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,15,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,31,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,31,0.013264000415802002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,63,0.026767998933792114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,63,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,127,0.027642667293548584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,127,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,255,0.05269866685072581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,255,0.02657066782315572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,511,0.06673599779605865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,1,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,511,0.039701332648595176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,1023,0.08406933148701985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,1023,0.051327998439470925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,1,0.04632000128428141
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,3,0.04574400186538696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,3,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,7,0.04577066500981649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,31,0.015685333559910457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,7,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,15,0.046426668763160706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,15,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,31,0.04646400113900503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,63,0.04539733131726583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,63,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,511,0.07149333258469899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,127,0.04638933142026266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,127,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,255,0.05579199890295664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,255,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,511,0.036933332681655884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,1023,0.10635733604431152
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,1,0.08136533200740814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,1023,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,1,0.024661332368850708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,3,0.0821919987599055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,3,0.024400000770886738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,7,0.0819946676492691
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,7,0.024186665813128155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,15,0.08168533444404602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,15,0.023738667368888855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,31,0.08237333099047343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,31,0.02380799998839696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,63,0.08133333424727122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,63,0.02441066751877467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,127,0.08333866794904073
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,127,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,255,0.09556800127029419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,255,0.04091733445723852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,511,0.06060799956321716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,511,0.12845866878827414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,1023,0.1968053380648295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,1023,0.10021866361300151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,1,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,3,0.03482133398453394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,1,0.017610666652520496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,3,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,7,0.034874667723973594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,7,0.017850667238235474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,15,0.03548266738653183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,15,0.017829333742459614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,31,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,31,0.0352906659245491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,63,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,63,0.034847999612490334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,127,0.03450666616360346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,127,0.020080000162124634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,255,0.06825600067774455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,511,0.08367466926574707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,255,0.04702933132648468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,1,0.04726399978001913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,511,0.06540266672770183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,1,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,3,0.04718933502833048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,3,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,7,0.0476800004641215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,15,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,7,0.018751999984184902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,15,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,31,0.01820266619324684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,31,0.04773333172003428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,63,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,127,0.047637333472569786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,63,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,127,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,255,0.030794667700926464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,255,0.05728533367315928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,1,0.08355200290679932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,511,0.07497066756089528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,511,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,1,0.026704000929991405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,3,0.025973332424958546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,3,0.08483733733495076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,7,0.025962665677070618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,7,0.08435199658075969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,15,0.084714670976003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,15,0.026698666314284008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,31,0.026213333010673523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,31,0.0846720039844513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,63,0.0258240004380544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,63,0.08563733100891113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,127,0.0863146682580312
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,127,0.03429866582155228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,255,0.04679466784000397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,255,0.09965866804122925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,511,0.06555733581384023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,511,0.13260799646377563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,1,0.1537813345591227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,1,0.04614933331807455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,7,0.04580800235271454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,3,0.044879997769991554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,3,0.1537493367989858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,15,0.04632533093293508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,15,0.15217066804567972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,7,0.15372799833615622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,31,0.155130664507548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,31,0.04587199787298838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,63,0.15491732954978943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,63,0.0452106644709905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,127,0.15620799859364828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,127,0.05709866682688395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,255,0.1788960099220276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,3,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,1,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,255,0.07498666644096375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,1,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,511,0.24341332912445068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,511,0.11199999849001567
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,3,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,7,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,7,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,15,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,15,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,31,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,31,0.010005333150426546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,63,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,63,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,127,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,1023,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,127,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,255,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,255,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,511,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,511,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,1023,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,2047,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,2047,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,4095,0.021530665457248688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,4095,0.01821333294113477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,8191,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,8191,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,16383,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,16383,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,32767,0.06391466657320659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,1,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,3,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,1,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,32767,0.05553600192070007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,7,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,65535,0.07666133344173431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,7,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,15,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,65535,0.09781866272290547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,127,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,15,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,31,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,255,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,31,0.007760000104705493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,63,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,1023,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,63,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,127,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,255,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,511,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,511,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,1023,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,2047,0.02233600119749705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,2047,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,4095,0.025642665723959606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,4095,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,8191,0.036277333895365395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,8191,0.021722666919231415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,16383,0.05301333467165629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,16383,0.036901332437992096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,32767,0.08123200138409932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,1,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,32767,0.05250666538874308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,1,0.0059199997534354525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,3,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,3,0.006037333359320958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,65535,0.08636266986529033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,7,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,65535,0.12849066654841104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,7,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,15,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,15,0.008256000156203905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,31,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,31,0.006026666611433029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,63,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,63,0.00690133310854435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,127,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,127,0.006602666651209195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,255,0.01340266689658165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,255,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,511,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,511,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,1023,0.019760000209013622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,1023,0.010373333469033241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,2047,0.028042666614055634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,2047,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,4095,0.040175999204317726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,16383,0.08777599533398946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,4095,0.01972266659140587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,8191,0.059114664793014526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,8191,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,16383,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,32767,0.12274666627248128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,1,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,32767,0.06512000163396199
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,1,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,3,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,3,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,7,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,65535,0.2005013426144918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,7,0.00595199999709924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,15,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,65535,0.10925333698590596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,15,0.007231999809543292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,31,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,31,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,255,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,63,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,63,0.005989333614706993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,127,0.010351999973257383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,127,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,255,0.014005333185195923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,511,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,511,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,1023,0.028549333413441975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,1023,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,2047,0.04256533086299896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,2047,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,4095,0.0673226664463679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,4095,0.026320000489552815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,8191,0.08588266372680664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,8191,0.03781333317359289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,16383,0.11945600310961406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,16383,0.05704533557097117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,32767,0.18991466363271078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,32767,0.09716266393661499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,1,0.06381333371003468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,3,0.030458666384220123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,1,0.03164266546567281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,65535,0.17885865767796835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,7,0.06366399923960368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,3,0.06358933448791504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,65535,0.33346664905548096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,7,0.03054933249950409
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,15,0.03051200012365977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,15,0.06387199958165486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,63,0.0641546646753947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,31,0.06437333424886067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,31,0.032618666688601174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,127,0.038592000802357994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,63,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,255,0.0745600014925003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,1,0.087226668993632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,127,0.0662773350874583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,3,0.03399466723203659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,255,0.05859733124574026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,1,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,3,0.08860266208648682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,7,0.033002667129039764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,15,0.08725866675376892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,7,0.08759466807047527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,15,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,63,0.0328053335348765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,31,0.08829866846402486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,31,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,127,0.09071466326713562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,63,0.0890773336092631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,255,0.1037493348121643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,127,0.05064000189304352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,255,0.061978667974472046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,3,0.15741866827011108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,1,0.1581546664237976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,1,0.056976000467936196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,7,0.1570186714331309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,3,0.056741332014401756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,15,0.1584106683731079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,15,0.05602133274078369
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,7,0.056320001681645714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,63,0.15980266531308493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,31,0.161189337571462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,31,0.05705599983533224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,127,0.1620213290055593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,127,0.06608533362547557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,63,0.05677333474159241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,255,0.08387200037638347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,255,0.18623999754587808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,3,0.29601067304611206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,3,0.09006399909655254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,1,0.2977546652158101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,1,0.0904319981733958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,7,0.0899786651134491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,7,0.30050132671991986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,15,0.08949866890907288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,31,0.3033226728439331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,15,0.2990666627883911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,63,0.08960533142089844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,31,0.09065600236256917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,63,0.3015519976615906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,127,0.10433600346247356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,255,0.3479573329289754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,127,0.3025173346201579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,255,0.1406773328781128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,1,0.08700266480445862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,7,0.12062399586041768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,7,0.08630399902661641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,1,0.12098667025566101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,3,0.12170666456222534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,3,0.08755200107892354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,31,0.12264000376065572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,15,0.12245866656303406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,31,0.08709866801897685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,15,0.08799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,63,0.12196266651153564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,1,0.1651946703592936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,63,0.08753066261609395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,3,0.16485333442687988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,127,0.12273066242535909
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,127,0.09784533580144246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,7,0.16621333360671997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,1,0.08029333253701527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,7,0.07893333335717519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,3,0.0796853353579839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,15,0.07884266475836436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,63,0.1667733391125997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,15,0.16807466745376587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,127,0.1692319909731547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,31,0.16880534092585245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,31,0.07937600215276082
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,1,0.3081439932187398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,63,0.08012266457080841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,1,0.10567466417948405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,127,0.09287466605504353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,3,0.1051573355992635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,15,0.31059199571609497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,3,0.31031467517217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,31,0.3133866588274638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,7,0.3115359942118327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,7,0.10565867026646932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,63,0.3102933367093404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,15,0.10566400488217671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,31,0.10526399811108907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,63,0.10450133681297302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,127,0.12337066729863484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,3,0.5914293527603149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,127,0.3158666690190633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,7,0.17278399070103964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,1,0.5915733178456625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,1,0.17357865969340006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,3,0.17429333925247192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,15,0.5956960121790568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,7,0.5916266838709513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,15,0.17273600896199545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,1,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,31,0.1726400057474772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,1,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,3,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,3,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,7,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,127,0.20341867208480835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,7,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,31,0.5997600158055624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,15,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,63,0.5920053323109945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,63,0.17375467220942178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,15,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,31,0.01842133328318596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,127,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,127,0.5952746470769247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,31,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,63,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,63,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,511,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,1023,0.016042667130629223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,127,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,255,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,255,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,4095,0.029919999341169994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,511,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,1023,0.01422400027513504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,2047,0.02593066543340683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,2047,0.017840000490347546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,4095,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,8191,0.03604800005753835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,16383,0.04543999830881754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,8191,0.04553600152333578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,16383,0.06758399804433186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,1,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,1,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,3,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,3,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,32767,0.1058240036169688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,15,0.007605333502093951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,7,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,32767,0.08240533371766408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,63,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,7,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,15,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,31,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,31,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,63,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,127,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,127,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,255,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,255,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,511,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,511,0.009925333162148794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,4095,0.024800000091393787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,1023,0.02046400060256322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,1023,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,2047,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,2047,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,4095,0.040661332507928215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,8191,0.059664001067479454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,8191,0.03242133309443792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,1,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,16383,0.08720533053080241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,16383,0.05525333185990652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,1,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,3,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,32767,0.127018670241038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,3,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,32767,0.0820906658967336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,7,0.006128000095486641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,15,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,15,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,31,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,31,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,63,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,63,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,127,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,127,0.006773333375652631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,255,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,255,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,511,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,511,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,1023,0.02811199923356374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,1023,0.01632533346613248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,2047,0.04305600126584371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,2047,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,4095,0.06765333314736684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,4095,0.028768000503381092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,8191,0.08590400218963623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,8191,0.04126933217048645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,16383,0.11984533071517944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,16383,0.059936001896858215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,1,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,7,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,1,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,32767,0.0997920036315918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,3,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,3,0.007631999750932057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,32767,0.193557341893514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,7,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,15,0.011274666835864386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,15,0.00590933362642924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,31,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,127,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,31,0.006031999985376994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,63,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,511,0.02868266652027766
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,63,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,127,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,1023,0.020421333611011505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,255,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,255,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,511,0.0136266661187013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,1023,0.04622933268547058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,2047,0.06571733454863231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,2047,0.025802666942278545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,4095,0.08397333820660909
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,4095,0.037834666669368744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,8191,0.057333335280418396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,8191,0.11987732847531636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,16383,0.18995199600855509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,16383,0.09776000181833903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,1,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,1,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,32767,0.17776000499725342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,32767,0.3328373432159424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,1,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,1,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,3,0.014549333602190018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,3,0.013861333330472311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,3,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,3,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,7,0.014639999717473984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,7,0.014602666099866232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,7,0.00731733317176501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,7,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,15,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,15,0.014261333892742792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,15,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,15,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,31,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,31,0.015696000307798386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,31,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,31,0.006698666761318843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,63,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,63,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,63,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,63,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,127,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,127,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,127,0.007231999809543292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,127,0.007274666801095009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,255,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,255,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,255,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,255,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,511,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,511,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,511,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,511,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,1023,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,1023,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,1023,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,1023,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,2047,0.019978666057189304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,2047,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,2047,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,2047,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,4095,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,4095,0.033743999898433685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,4095,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,4095,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,8191,0.02847466617822647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,8191,0.04603200157483419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,8191,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,8191,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,16383,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,16383,0.06957333286603291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,16383,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,16383,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,1,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,1,0.010330666477481524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,32767,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,1,0.006784000123540561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,32767,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,1,0.006773333375652631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,32767,0.06568533182144165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,3,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,32767,0.09386666615804036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,3,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,3,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,3,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,7,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,15,0.006842666616042455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,7,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,7,0.0058399997651577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,7,0.00847999999920527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,31,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,15,0.010378666842977205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,15,0.010293333480755487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,15,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,31,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,31,0.010549332946538925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,31,0.005850666513045629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,63,0.010330666477481524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,63,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,63,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,63,0.0069973332186539965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,127,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,255,0.012608000387748083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,127,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,127,0.006208000083764394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,127,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,255,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,255,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,255,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,511,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,511,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,511,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,511,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,1023,0.02162133405605952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,1023,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,1023,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,1023,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,2047,0.027632000545660656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,2047,0.036202666660149894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,2047,0.008656000097592672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,2047,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,4095,0.034629332522551216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,4095,0.05243733525276184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,4095,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,4095,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,8191,0.03401066611210505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,8191,0.06808533271153767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,8191,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,8191,0.030058667063713074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,16383,0.033813332517941795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,16383,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,16383,0.09038399656613667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,16383,0.04709866642951965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,1,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,1,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,32767,0.03460799902677536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,1,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,1,0.006026666611433029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,32767,0.13321600357691446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,32767,0.07992533346017201
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,3,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,32767,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,3,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,3,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,3,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,7,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,7,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,7,0.0060159998635451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,7,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,15,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,15,0.010485333700974783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,15,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,15,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,31,0.01028266673286756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,31,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,31,0.006026666611433029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,31,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,63,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,63,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,127,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,63,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,63,0.007045333584149678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,127,0.010293333480755487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,127,0.010538666198650995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,127,0.006175999840100606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,255,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,255,0.018063999712467194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,255,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,255,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,511,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,511,0.02495466669400533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,511,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,511,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,1023,0.03314133236805598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,1023,0.040074666341145836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,1023,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,1023,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,2047,0.03573866685231527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,2047,0.051327998439470925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,2047,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,4095,0.028549333413441975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,2047,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,4095,0.03586666782697042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,4095,0.06653866668542226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,4095,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,8191,0.0885653297106425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,8191,0.03530666728814443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,8191,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,8191,0.045978665351867676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,16383,0.03473066786924998
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,16383,0.13183466593424478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,16383,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,16383,0.07784000039100647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,32767,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,32767,0.21954667568206787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,32767,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,32767,0.14321066935857138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,1,0.006501333167155583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,3,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,1,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,7,0.0064106664309899015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,7,0.006389333556095759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,3,0.006298666819930077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,1,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,3,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,15,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,31,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,15,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,31,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,31,0.006288000072042148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,31,0.0063040001938740415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,63,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,63,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,63,0.00624533308049043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,63,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,127,0.014357333381970724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,127,0.01413333291808764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,127,0.00690133310854435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,7,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,127,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,255,0.026378666361172993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,255,0.02649066597223282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,255,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,255,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,511,0.03595199932654699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,511,0.040789333482583366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,511,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,7,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,511,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,1023,0.036501333117485046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,1023,0.04886400202910105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,1023,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,1023,0.019434666881958645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,2047,0.06644799808661143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,2047,0.038362666964530945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,1,0.014453332871198654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,2047,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,2047,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,4095,0.03851199895143509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,4095,0.0886240005493164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,15,0.006069333602984746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,4095,0.04555733501911163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,8191,0.03834133346875509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,15,0.006405333057045937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,8191,0.13151466846466064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,8191,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,8191,0.07695466776688893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,16383,0.03908266623814901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,16383,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,16383,0.2186773419380188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,16383,0.14135467012723288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,3,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,1,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,32767,0.03825066735347112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,1,0.007690666864315669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,1,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,32767,0.38970665136973065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,1,0.006517333288987477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,32767,0.010426666587591171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,32767,0.2715146740277608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,3,0.007573333258430163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,3,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,3,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,3,0.006351999938488007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,7,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,7,0.008341333518425623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,7,0.0063040001938740415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,7,0.006762666627764702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,15,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,15,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,15,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,15,0.0063040001938740415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,31,0.008405333384871483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,63,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,31,0.007877333089709282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,31,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,31,0.006415999804933866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,63,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,63,0.006266666576266289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,63,0.008367999767263731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,4095,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,127,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,127,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,127,0.007130666946371396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,127,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,255,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,255,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,255,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,255,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,1023,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,511,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,511,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,511,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,511,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,1023,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,1023,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,1023,0.009546666716535887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,2047,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,2047,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,2047,0.007818666597207388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,2047,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,4095,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,4095,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,4095,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,4095,0.012458667159080505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,8191,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,8191,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,8191,0.007776000226537387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,8191,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,16383,0.014309333016475042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,16383,0.01578666642308235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,16383,0.1613653302192688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,16383,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,32767,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,32767,0.01874133323629697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,32767,0.008266666904091835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,32767,0.022810667753219604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,65535,0.016586666305859882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,65535,0.022197333474953968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,1,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,65535,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,65535,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,1,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,1,0.005765333150823911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,1,0.007274666801095009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,3,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,3,0.007690666864315669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,3,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,131071,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,3,0.005482666815320651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,7,0.00810666692753633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,7,0.007706666365265846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,131071,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,7,0.005637333417932193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,131071,0.029845332105954487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,15,0.005552000055710475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,7,0.005472000067432721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,131071,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,15,0.008122666428486506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,15,0.007733333234985669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,15,0.0058399997651577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,31,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,31,0.007925333455204964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,31,0.00549333356320858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,127,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,127,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,31,0.008256000156203905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,63,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,63,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,63,0.0058453331391016645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,63,0.005578666925430298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,127,0.006053333481152852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,127,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,255,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,255,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,511,0.007962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,1023,0.013616000612576803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,255,0.006810666372378667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,255,0.007776000226537387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,511,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,511,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,511,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,1023,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,1023,0.007349333415428798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,1023,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,2047,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,2047,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,2047,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,2047,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,4095,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,4095,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,4095,0.007274666801095009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,4095,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,8191,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,8191,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,8191,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,8191,0.01191466674208641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,16383,0.013674666484196981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,16383,0.0163680004576842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,16383,0.007184000064929326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,16383,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,32767,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,32767,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,32767,0.007120000198483467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,32767,0.01800000046690305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,65535,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,65535,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,65535,0.022618666291236877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,65535,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,1,0.00749333327015241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,1,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,1,0.005669333040714264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,1,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,3,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,3,0.007525333513816197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,3,0.005658666913708051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,3,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,131071,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,7,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,131071,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,7,0.008090666805704435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,7,0.008432000254591307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,131071,0.03286399940649668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,15,0.00730666642387708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,15,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,131071,0.0401653324564298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,7,0.005552000055710475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,15,0.00749333327015241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,15,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,31,0.007791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,31,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,31,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,31,0.0074453335255384445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,63,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,63,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,63,0.00600533311565717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,63,0.00559999980032444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,127,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,255,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,127,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,127,0.007082666580875714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,127,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,255,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,255,0.006671999891599019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,511,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,255,0.007680000116427739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,511,0.010191999996701876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,511,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,511,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,1023,0.013573333621025085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,1023,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,1023,0.0068800002336502075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,1023,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,2047,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,2047,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,2047,0.006757333253820737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,2047,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,4095,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,4095,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,4095,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,4095,0.012170666207869848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,8191,0.013584000368913015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,8191,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,8191,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,16383,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,8191,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,16383,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,16383,0.014368000129858652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,16383,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,32767,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,32767,0.02605333427588145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,32767,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,32767,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,65535,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,65535,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,65535,0.03239466746648153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,1,0.007727999861041705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,65535,0.024608001112937927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,1,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,1,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,1,0.0058080001423756284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,3,0.008112000301480293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,3,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,3,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,3,0.0058026667684316635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,131071,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,131071,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,7,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,7,0.007738666608929634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,7,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,7,0.005994666367769241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,15,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,131071,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,15,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,131071,0.037674665451049805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,15,0.007274666801095009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,15,0.00596266674498717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,31,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,31,0.008133333176374435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,31,0.005642666791876157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,31,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,63,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,63,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,63,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,63,0.005984000240763028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,127,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,255,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,255,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,127,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,127,0.006095999851822853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,127,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,255,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,255,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,511,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,511,0.010202666744589806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,511,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,1023,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,511,0.007647999872763951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,1023,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,1023,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,2047,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,1023,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,2047,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,2047,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,2047,0.007391999786098798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,4095,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,4095,0.013701333353916803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,4095,0.0069440001000960665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,4095,0.009989333028594652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,8191,0.01603200038274129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,8191,0.0199946661790212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,8191,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,8191,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,16383,0.02350933353106181
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,16383,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,16383,0.006837333242098491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,16383,0.01651200031240781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,32767,0.0201706662774086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,32767,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,32767,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,32767,0.022687998910744984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,65535,0.051354666550954185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,65535,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,65535,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,1,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,65535,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,1,0.008197333042820295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,1,0.008272000278035799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,1,0.006448000048597653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,3,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,3,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,3,0.0064319999267657595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,3,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,131071,0.08714133501052856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,7,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,131071,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,131071,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,131071,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,7,0.008133333176374435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,7,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,7,0.006277333324154218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,15,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,15,0.007989333321650824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,15,0.0063786668082078295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,15,0.006517333288987477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,31,0.008341333518425623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,31,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,31,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,31,0.0063040001938740415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,63,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,63,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,63,0.006570666407545407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,63,0.006522666662931442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,255,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,127,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,127,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,511,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,127,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,127,0.006810666372378667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,255,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,255,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,255,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,511,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,511,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,511,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,1023,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,1023,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,1023,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,1023,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,2047,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,2047,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,2047,0.008266666904091835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,2047,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,4095,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,4095,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,4095,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,4095,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,8191,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,8191,0.015925332903862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,8191,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,8191,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,16383,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,16383,0.016447999825080235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,16383,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,16383,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,32767,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,32767,0.02022933339079221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,32767,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,32767,0.022821334501107533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,65535,0.022869333624839783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,65535,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,65535,0.007765333478649457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,1,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,1,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,65535,0.028970666229724884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,1,0.005594666426380475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,1,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,3,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,3,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,3,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,3,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,7,0.007637333124876022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,7,0.008117333054542542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,7,0.008629333227872849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,7,0.005573333551486333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,15,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,131071,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,15,0.005626666670044263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,15,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,31,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,15,0.007093333328763644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,131071,0.0407679999868075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,131071,0.041834667325019836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,63,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,131071,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,31,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,31,0.008309333274761835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,31,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,63,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,63,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,127,0.007002666592597961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,63,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,127,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,127,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,127,0.008378666515151659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,255,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,255,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,255,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,255,0.007791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,511,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,511,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,511,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,511,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,1023,0.013999999811251959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,1023,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,1023,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,1023,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,4095,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,2047,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,2047,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,2047,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,2047,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,4095,0.014159999787807465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,4095,0.007647999872763951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,4095,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,8191,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,16383,0.017738666385412216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,8191,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,8191,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,8191,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,16383,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,16383,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,16383,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,32767,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,32767,0.020453333854675293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,32767,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,32767,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,65535,0.03292799989382426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,65535,0.02011200040578842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,65535,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,1,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,1,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,65535,0.026026666164398193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,1,0.007962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,1,0.006895999734600385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,3,0.008272000278035799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,3,0.008309333274761835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,3,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,3,0.007093333328763644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,131071,0.026202666262785595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,7,0.008314666648705801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,131071,0.03777066618204117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,7,0.008272000278035799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,15,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,7,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,7,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,131071,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,131071,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,15,0.007663999994595845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,15,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,15,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,31,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,31,0.00847999999920527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,31,0.007727999861041705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,31,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,63,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,63,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,63,0.005621333296100299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,63,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,127,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,127,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,255,0.007818666597207388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,127,0.007269333427151044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,511,0.010144000252087912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,127,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,255,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,255,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,255,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,511,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,511,0.007482666522264481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,511,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,1023,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,1023,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,1023,0.006858666737874349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,1023,0.007930666829148928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,2047,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,2047,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,2047,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,2047,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,4095,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,4095,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,4095,0.006741333131988843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,4095,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,8191,0.013701333353916803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,8191,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,8191,0.020666666328907013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,8191,0.007594666754206021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,16383,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,16383,0.023717333873112995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,16383,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,16383,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,32767,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,32767,0.03531199942032496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,32767,0.022389332453409832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,32767,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,65535,0.02418133368094762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,65535,0.053183997670809426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,65535,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,1,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,65535,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,1,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,1,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,1,0.006837333242098491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,3,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,3,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,3,0.006800000245372455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,3,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,7,0.008165333420038223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,131071,0.08849066495895386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,131071,0.06132266422112783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,7,0.00814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,131071,0.03202133377393087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,7,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,7,0.007274666801095009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,15,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,131071,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,15,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,15,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,31,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,31,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,31,0.007231999809543292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,31,0.005584000299374263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,63,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,63,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,63,0.007050666958093643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,127,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,255,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,63,0.008245333408315977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,127,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,127,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,127,0.005749333028992017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,511,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,255,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,255,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,255,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,511,0.01227733368674914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,511,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,511,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,2047,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,1023,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,1023,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,1023,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,4095,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,1023,0.008037333066264788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,2047,0.01802666609485944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,2047,0.007082666580875714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,2047,0.01003200002014637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,4095,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,4095,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,4095,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,8191,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,8191,0.0276853342851003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,8191,0.007055999711155891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,8191,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,16383,0.022255999346574146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,16383,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,16383,0.020015999674797058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,16383,0.007733333234985669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,32767,0.02810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,32767,0.05894933144251505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,32767,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,32767,0.03480000048875809
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,65535,0.09012266993522644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,65535,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,65535,0.0081386665503184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,65535,0.05314133564631144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,1,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,1,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,1,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,1,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,131071,0.035749333600203194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,3,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,3,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,3,0.006490666419267654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,131071,0.14338133732477823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,7,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,3,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,131071,0.008117333054542542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,131071,0.09299733241399129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,7,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,7,0.008421333506703377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,7,0.007391999786098798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,15,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,15,0.010490667074918747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,15,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,15,0.006400000303983688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,31,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,31,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,31,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,15,0.006751999879876773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,31,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,63,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,63,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,63,0.008325333396593729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,63,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,127,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,127,0.010234666367371878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,127,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,127,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,255,0.012245333443085352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,255,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,511,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,255,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,255,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,511,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,511,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,511,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,1023,0.021583999196688335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,1023,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,1023,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,1023,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,2047,0.02845866729815801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,2047,0.03687999894221624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,2047,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,2047,0.019909333437681198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,4095,0.03469333300987879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,4095,0.053616002202034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,4095,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,8191,0.0689333329598109
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,4095,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,8191,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,8191,0.0352906659245491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,8191,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,1,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,16383,0.034527999659379326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,1,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,1,0.005957333371043205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,16383,0.09060266613960266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,16383,0.058335999647776283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,3,0.005973333492875099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,1,0.006864000111818314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,16383,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,3,0.010186666622757912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,3,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,7,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,7,0.005989333614706993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,7,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,15,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,15,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,15,0.005968000118931134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,63,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,15,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,31,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,31,0.01020800011853377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,31,0.00701333334048589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,31,0.007109333450595538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,63,0.005941333249211311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,63,0.008309333274761835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,127,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,127,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,127,0.006319999694824219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,255,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,511,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,255,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,255,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,255,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,511,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,511,0.025573333104451496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,511,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,1023,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,1023,0.0402453343073527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,1023,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,1023,0.016501333564519882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,2047,0.03583466758330663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,2047,0.052042668064435325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,2047,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,2047,0.02073066681623459
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,4095,0.035973332822322845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,4095,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,4095,0.06726933519045512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,4095,0.029002666473388672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,8191,0.03595733394225439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,8191,0.08953600128491719
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,8191,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,8191,0.046480000019073486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,16383,0.03521066655715307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,1,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,1,0.014661333213249842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,16383,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,16383,0.13221866885821024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,1,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,16383,0.07973866661389668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,1,0.006453333422541618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,3,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,3,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,3,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,3,0.007178666690985362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,7,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,7,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,7,0.0064213331788778305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,7,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,15,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,15,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,15,0.007178666690985362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,15,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,31,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,31,0.014597332725922266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,31,0.007226666435599327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,31,0.006906666482488315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,63,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,63,0.014202666779359182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,63,0.007834666719039282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,63,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,255,0.02605866640806198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,127,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,127,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,127,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,127,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,255,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,255,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,255,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,511,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,511,0.04074666649103165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,511,0.009935999910036722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,511,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,1023,0.0366239994764328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,2047,0.06743466854095459
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,1023,0.04993600149949392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,1023,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,1023,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,2047,0.03791466603676478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,2047,0.009904000287254652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,2047,0.02812266598145167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,4095,0.038805333276589714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,4095,0.08919999996821086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,4095,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,4095,0.045328001181284584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,8191,0.13197867075602213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,8191,0.03862933317820231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,8191,0.009888000165422758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,8191,0.07809600234031677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,1,0.022442666192849476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,16383,0.03886933376391729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,1,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,1,0.007002666592597961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,16383,0.2200373411178589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,16383,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,1,0.0075093333919843035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,3,0.02275199939807256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,16383,0.14202133814493814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,3,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,3,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,3,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,7,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,7,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,15,0.006895999734600385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,7,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,7,0.0068693334857622785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,15,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,15,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,15,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,31,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,31,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,31,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,127,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,31,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,63,0.022602667411168415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,63,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,63,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,63,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,127,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,127,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,127,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,255,0.04409066836039225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,255,0.04286933441956838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,255,0.013994666437307993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,255,0.01573333392540614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,511,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,511,0.051925331354141235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,1023,0.06594133377075195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,511,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,1023,0.028090665737787884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,511,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,1023,0.04331733286380768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,1023,0.01379199946920077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,2047,0.04566933214664459
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,2047,0.09118933478991191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,2047,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,2047,0.04594666759173075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,4095,0.04695466657479604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,4095,0.13296533624331155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,4095,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,4095,0.07747200131416321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,8191,0.04603200157483419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,8191,0.22085332870483398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,8191,0.013925333817799887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,8191,0.14283733566602072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,16383,0.04724800089995066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,1,0.008245333408315977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,1,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,16383,0.013274667163689932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,1,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,16383,0.3938133319218953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,1,0.006405333057045937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,3,0.008207999790708223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,16383,0.2702186703681946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,3,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,3,0.00625599982837836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,3,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,7,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,7,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,7,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,7,0.007045333584149678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,15,0.0085333331177632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,15,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,15,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,63,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,15,0.006384000182151794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,31,0.008256000156203905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,31,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,31,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,127,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,31,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,63,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,63,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,63,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,127,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,127,0.007733333234985669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,127,0.007680000116427739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,255,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,255,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,255,0.0075093333919843035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,1023,0.014271999398867289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,255,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,511,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,511,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,511,0.008192000289758047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,511,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,1023,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,1023,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,1023,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,4095,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,2047,0.014085333794355392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,2047,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,2047,0.007733333234985669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,2047,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,4095,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,4095,0.007727999861041705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,4095,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,16383,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,8191,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,8191,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,8191,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,8191,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,16383,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,16383,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,16383,0.017877332866191864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,32767,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,32767,0.028138667345046997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,32767,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,32767,0.024586667617162068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,65535,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,65535,0.0345920001467069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,1,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,65535,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,65535,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,1,0.006581333155433337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,1,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,1,0.005535999933878581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,3,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,3,0.008016000191370646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,3,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,3,0.006837333242098491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,7,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,7,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,7,0.006618666773041089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,7,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,15,0.008261333530147871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,15,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,131071,0.027834666272004444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,15,0.007050666958093643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,131071,0.057248001297314964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,15,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,131071,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,131071,0.04987200101216634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,31,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,31,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,31,0.006949333474040031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,31,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,63,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,63,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,63,0.008367999767263731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,63,0.006544000158707301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,127,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,127,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,127,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,127,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,255,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,255,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,255,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,255,0.007882666463653246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,511,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,511,0.010373333469033241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,511,0.007696000238259633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,511,0.008314666648705801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,1023,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,2047,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,1023,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,1023,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,1023,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,4095,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,2047,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,2047,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,2047,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,4095,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,4095,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,4095,0.007776000226537387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,8191,0.021722666919231415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,16383,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,8191,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,8191,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,8191,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,16383,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,16383,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,16383,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,32767,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,32767,0.03702933341264725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,32767,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,32767,0.023936000963052113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,65535,0.026330667237440746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,65535,0.055914665261904396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,65535,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,1,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,65535,0.03386666625738144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,1,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,1,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,1,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,3,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,3,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,3,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,3,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,7,0.008341333518425623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,7,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,7,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,131071,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,7,0.005674666414658229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,131071,0.09047466516494751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,15,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,15,0.009583999713261923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,131071,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,15,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,131071,0.06387733419736226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,15,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,31,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,31,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,31,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,31,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,63,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,63,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,63,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,63,0.0057920000205437345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,127,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,127,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,127,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,127,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,511,0.01201066623131434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,255,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,255,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,1023,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,255,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,255,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,511,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,511,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,511,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,1023,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,1023,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,1023,0.008592000231146812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,2047,0.016069332758585613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,2047,0.018533332894245785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,2047,0.00731733317176501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,2047,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,4095,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,4095,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,4095,0.007536000261704127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,4095,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,8191,0.019626667102177937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,8191,0.02845333268245061
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,8191,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,8191,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,16383,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,16383,0.038704000413417816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,16383,0.0075093333919843035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,32767,0.061466669042905174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,32767,0.03508266558249792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,16383,0.020143999407688778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,32767,0.028165332973003387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,32767,0.007962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,65535,0.03517866631348928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,65535,0.09244799613952637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,1,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,65535,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,1,0.009573333586255709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,65535,0.052282666166623436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,1,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,1,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,3,0.008282666405042013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,3,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,3,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,3,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,7,0.008154666672150293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,7,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,7,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,131071,0.14524267117182413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,131071,0.03702933341264725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,7,0.007381333038210869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,15,0.008282666405042013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,131071,0.0942186713218689
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,15,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,15,0.005695999910434087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,131071,0.008016000191370646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,15,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,31,0.007962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,31,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,31,0.007376000285148621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,31,0.00701333334048589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,63,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,63,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,63,0.0069226666043202085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,63,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,127,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,127,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,255,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,127,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,127,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,255,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,255,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,255,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,1023,0.01788266624013583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,511,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,511,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,511,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,511,0.008122666428486506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,1023,0.01657066618402799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,1023,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,1023,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,2047,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,4095,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,2047,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,2047,0.00789866658548514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,2047,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,4095,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,4095,0.032586666444937386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,4095,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,8191,0.028229333460330963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,8191,0.044981335600217186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,8191,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,8191,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,16383,0.033957332372665405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,16383,0.06817600131034851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,16383,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,16383,0.030821333328882854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,32767,0.03502399971087774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,32767,0.09210667014122009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,32767,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,32767,0.05147199829419454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,65535,0.0346666673819224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,65535,0.1390773355960846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,65535,0.008101333553592363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,65535,0.084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,1,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,1,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,1,0.007178666690985362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,1,0.0074560002734263735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,3,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,3,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,3,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,3,0.0074453335255384445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,7,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,7,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,131071,0.036746665835380554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,7,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,131071,0.14617066582043967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,7,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,15,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,131071,0.23542400201161703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,131071,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,15,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,15,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,15,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,31,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,31,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,31,0.006549333532651265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,63,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,63,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,63,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,63,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,127,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,127,0.010346666599313417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,127,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,127,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,255,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,255,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,255,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,511,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,255,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,511,0.022778667509555817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,511,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,511,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,1023,0.03379733363787333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,1023,0.04062400013208389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,1023,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,1023,0.020506666352351505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,2047,0.03547733277082443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,2047,0.051962668697039284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,2047,0.010319999729593595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,2047,0.024874667326609295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,4095,0.03621866554021835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,4095,0.06840000053246816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,4095,0.03460799902677536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,4095,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,1,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,8191,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,8191,0.08937600255012512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,1,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,8191,0.0102186668664217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,1,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,8191,0.05609600245952606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,1,0.007055999711155891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,3,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,3,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,3,0.006261333202322324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,3,0.008127999802430471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,7,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,7,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,7,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,7,0.007637333124876022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,15,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,15,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,15,0.006239999706546466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,15,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,31,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,31,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,31,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,31,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,63,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,63,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,63,0.006277333324154218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,63,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,255,0.026533332963784535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,127,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,127,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,127,0.007690666864315669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,127,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,255,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,511,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,255,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,255,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,511,0.03621333340803782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,511,0.04127466678619385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,511,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,1023,0.036618667344252266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,1023,0.04987200101216634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,1023,0.020224000016848247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,1023,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,2047,0.03939199944337209
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,4095,0.03913066784540812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,2047,0.06773333251476288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,2047,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,2047,0.02889599899450938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,4095,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,4095,0.08990933497746785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,8191,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,4095,0.04656533400217692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,8191,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,8191,0.13357333342234293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,8191,0.07860266665617625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,1,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,1,0.023541333774725597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,1,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,1,0.00847999999920527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,3,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,3,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,3,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,3,0.007594666754206021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,15,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,7,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,7,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,7,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,7,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,15,0.02364266663789749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,15,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,15,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,31,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,31,0.0235359991590182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,31,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,31,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,63,0.02348266790310542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,63,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,63,0.00731733317176501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,63,0.00749333327015241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,127,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,127,0.023685333629449207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,127,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,127,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,255,0.04369066655635834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,255,0.04327466587225596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,255,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,255,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,511,0.04377600053946177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,511,0.05145066479841868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,511,0.014122666170199713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,511,0.019946667055288952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,1023,0.043477331598599754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,1023,0.06605333089828491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,1023,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,1023,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,2047,0.045552000403404236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,2047,0.09144533673922221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,2047,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,2047,0.04553600152333578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,4095,0.07824000219504039
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,4095,0.04586666822433472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,4095,0.13480533162752786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,4095,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,8191,0.04535466432571411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,1,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,8191,0.014015999933083853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,8191,0.2225546638170878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,1,0.04005866746107737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,1,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,8191,0.14198399583498636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,1,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,3,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,3,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,3,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,3,0.011493333925803503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,7,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,7,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,7,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,7,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,15,0.03908266623814901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,15,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,31,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,15,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,15,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,31,0.039936001102129616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,31,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,31,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,63,0.03860266755024592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,63,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,63,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,63,0.011530666301647821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,127,0.03997866561015447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,127,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,127,0.013306666165590286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,127,0.01331199953953425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,511,0.04196799794832865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,255,0.040037333965301514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,255,0.04087999959786733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,255,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,255,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,511,0.05373333394527435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,1023,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,1023,0.042650664846102394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,511,0.013280000537633896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,511,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,1023,0.04252266883850098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,1023,0.0749066670735677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,2047,0.0441599984963735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,2047,0.12337600191434224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,2047,0.013280000537633896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,2047,0.07507733503977458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,4095,0.044666667779286705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,4095,0.20773865779240927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,4095,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,4095,0.14018133282661438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,8191,0.04494933287302653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,1,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,8191,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,8191,0.3811359802881877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,1,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,1,0.007727999861041705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,8191,0.2696160078048706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,1,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,3,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,7,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,3,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,3,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,3,0.007733333234985669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,7,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,7,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,7,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,15,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,15,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,15,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,15,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,31,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,31,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,31,0.007637333124876022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,31,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,63,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,63,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,127,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,63,0.007706666365265846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,63,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,127,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,127,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,127,0.008170666793982187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,255,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,255,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,255,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,255,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,511,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,511,0.03628266602754593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,511,0.04177066683769226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,511,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,1023,0.03682666768630346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,1023,0.05090666810671488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,2047,0.06855999926726024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,1023,0.01181866725285848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,1023,0.024304000039895374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,2047,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,2047,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,1,0.023685333629449207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,2047,0.03442666679620743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,1,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,1,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,1,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,3,0.023984000086784363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,3,0.02385599911212921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,3,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,3,0.007829333345095316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,7,0.023738667368888855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,7,0.02402133246262868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,7,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,7,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,15,0.023898666103680927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,15,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,15,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,15,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,31,0.02387733260790507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,31,0.024165332317352295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,31,0.006949333474040031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,31,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,63,0.02372266600529353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,63,0.023733332753181458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,63,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,63,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,127,0.023754666248957317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,127,0.02367466688156128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,127,0.008127999802430471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,127,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,255,0.04428266485532125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,255,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,255,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,255,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,511,0.04433066646258036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,511,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,511,0.0525493323802948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,511,0.02077866718173027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,1023,0.04428266485532125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,1023,0.06646400193373363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,1023,0.014298666268587112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,1023,0.028922667105992634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,2047,0.04673600196838379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,2047,0.09086400270462036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,1,0.013967999567588171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,2047,0.01404800017674764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,1,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,2047,0.046944002310434975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,1,0.04027733455101649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,1,0.01138666644692421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,3,0.04041066765785217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,3,0.03957333415746689
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,7,0.040864000717798867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,3,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,3,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,7,0.04031999905904134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,7,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,7,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,15,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,15,0.04104000081618627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,15,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,31,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,15,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,31,0.04020266731580099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,31,0.0400693342089653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,31,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,63,0.04044266790151596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,63,0.04050133377313614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,63,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,63,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,127,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,127,0.039919999738534294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,127,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,127,0.013440000514189402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,255,0.04177600145339966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,255,0.04130133241415024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,255,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,255,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,511,0.05389333268006643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,511,0.04218133290608724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,1023,0.04223466912905375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,511,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,511,0.025749333202838898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,1023,0.07634133100509644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,1023,0.04228800038496653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,1023,0.013359999905029932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,2047,0.04478399952252706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,2047,0.12378133336702983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,2047,0.0136266661187013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,1,0.07202133536338806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,1,0.07171733180681865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,2047,0.075354665517807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,3,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,3,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,1,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,3,0.07253333429495494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,1,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,3,0.07246399919191997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,7,0.07168533404668172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,7,0.0721973329782486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,7,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,7,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,15,0.07182933390140533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,15,0.07253866891066234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,31,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,15,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,15,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,31,0.07166400055090587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,31,0.07233599821726482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,31,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,63,0.07126399874687195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,63,0.0719946672519048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,63,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,63,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,127,0.0714026689529419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,127,0.0710346649090449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,127,0.02237333357334137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,127,0.022282667458057404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,255,0.07669333120187123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,511,0.07704000174999237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,255,0.0767626663049062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,255,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,255,0.030213333666324615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,511,0.0962720016638438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,511,0.022426667312781017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,511,0.047541335225105286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,1023,0.07734933495521545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,1023,0.14201600352923074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,1023,0.02239466706911723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,1023,0.07981333136558533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,2047,0.23007466395696005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,2047,0.08040000001589458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,1,0.024656000236670177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,2047,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,2047,0.1427893340587616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,1,0.024666666984558105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,1,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,1,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,3,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,3,0.00814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,3,0.024671999116738636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,7,0.024090667565663654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,3,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,7,0.024458666642506916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,7,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,7,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,15,0.02422933280467987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,15,0.024192000428835552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,15,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,15,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,31,0.024458666642506916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,31,0.024453334510326385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,31,0.008394666636983553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,31,0.007733333234985669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,63,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,63,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,63,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,127,0.0239680012067159
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,127,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,63,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,127,0.02388266722361247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,127,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,255,0.04472533365090688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,255,0.04456533491611481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,255,0.01836799954374631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,255,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,511,0.044639999667803444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,511,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,511,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,511,0.024122667809327442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,1023,0.06671999891599019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,1023,0.04473066826661428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,1,0.0403466671705246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,1023,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,1,0.011621333658695221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,1023,0.034847999612490334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,1,0.040805332362651825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,3,0.04114133367935816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,1,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,3,0.040218666195869446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,3,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,3,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,7,0.041365332901477814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,7,0.040821333726247154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,7,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,15,0.04038933416207632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,15,0.04077333211898804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,15,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,31,0.040234667559464775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,15,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,31,0.0408746674656868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,31,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,63,0.0403466671705246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,63,0.011546666423479715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,63,0.04098133246103922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,127,0.04080000023047129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,127,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,127,0.04041599979003271
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,127,0.013616000612576803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,255,0.041002665956815086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,255,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,255,0.013690666606028875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,255,0.017887999614079792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,511,0.05444266895453135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,511,0.04278400043646494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,511,0.013605333864688873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,511,0.026352000733216602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,1023,0.04185600082079569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,1023,0.07763733466466267
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,1,0.07264000177383423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,1023,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,1023,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,1,0.07361599802970886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,1,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,1,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,3,0.07294400036334991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,3,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,3,0.07308266560236613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,3,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,7,0.07375466823577881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,7,0.07390933235486348
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,7,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,7,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,15,0.0729973316192627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,15,0.07294933497905731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,15,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,15,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,31,0.07394133508205414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,31,0.07362133264541626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,31,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,31,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,63,0.07221333185831706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,63,0.07211199899514516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,63,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,63,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,127,0.07299200197060902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,127,0.07274666428565979
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,255,0.07821866869926453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,127,0.022330666581789654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,127,0.022154666483402252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,255,0.07703466713428497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,255,0.022282667458057404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,255,0.03046933313210805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,511,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,511,0.07795733213424683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,511,0.09807466467221577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,511,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,1023,0.14400532841682434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,1023,0.07764266431331635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,1023,0.07919466495513916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,1023,0.02231466770172119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,1,0.1349120040734609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,1,0.13476799925168356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,1,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,1,0.03305600086847941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,3,0.13320533434549967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,3,0.13370666901270548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,3,0.03297599901755651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,3,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,7,0.1346773306528727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,7,0.032399999598662056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,7,0.13457600275675455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,15,0.13355200489362082
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,7,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,15,0.13382933537165323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,15,0.03316800047953924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,31,0.1332266628742218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,15,0.03316266586383184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,31,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,31,0.13471999764442444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,31,0.03297599901755651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,63,0.133733332157135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,63,0.1325546701749166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,63,0.03224000086386999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,63,0.03299733251333237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,127,0.13529066244761148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,127,0.13553067048390707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,127,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,255,0.1423466702302297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,127,0.03993066648642222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,255,0.14145066340764365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,255,0.040448000033696495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,255,0.05724266668160757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,511,0.14411733547846475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,511,0.18106667200724283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,511,0.04051200052102407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,1023,0.14289066195487976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,511,0.08822932839393616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,1,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,1,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,1023,0.041077333192030586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,1,0.006175999840100606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,1,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,1023,0.2702080011367798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,3,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,1023,0.1498240033785502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,3,0.007269333427151044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,3,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,3,0.00620266670982043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,7,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,7,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,7,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,7,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,15,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,31,0.007738666608929634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,31,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,15,0.008207999790708223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,15,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,15,0.006224000205596288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,31,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,31,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,63,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,127,0.00789866658548514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,63,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,63,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,63,0.00749333327015241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,127,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,127,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,127,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,255,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,255,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,255,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,255,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,511,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,511,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,511,0.007871999715765318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,511,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,2047,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,1023,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,1023,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,1023,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,1023,0.010202666744589806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,2047,0.014602666099866232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,2047,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,2047,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,4095,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,4095,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,4095,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,4095,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,8191,0.017957333475351334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,8191,0.02199466774861018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,8191,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,8191,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,16383,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,16383,0.025626666843891144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,16383,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,16383,0.020549333343903225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,32767,0.02258133391539256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,32767,0.037061333656311035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,32767,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,32767,0.028410665690898895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,65535,0.026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,65535,0.05620799958705902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,65535,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,65535,0.04333333174387614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,1,0.008421333506703377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,1,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,1,0.005834666391213735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,1,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,3,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,3,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,3,0.00730666642387708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,3,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,7,0.007978666573762894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,7,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,131071,0.09101866682370503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,7,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,7,0.0058026667684316635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,131071,0.033813332517941795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,15,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,15,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,131071,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,31,0.00850133349498113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,31,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,15,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,15,0.005877333382765452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,131071,0.0813920001188914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,31,0.005701333284378052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,31,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,63,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,63,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,63,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,63,0.005754666402935982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,255,0.009546666716535887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,127,0.009477333476146063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,127,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,127,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,127,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,255,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,255,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,255,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,511,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,511,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,511,0.007482666522264481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,511,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,1023,0.012351999680201212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,1023,0.012319999436537424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,1023,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,1023,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,2047,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,2047,0.018570666511853535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,2047,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,2047,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,4095,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,4095,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,4095,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,4095,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,8191,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,8191,0.028234665592511494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,8191,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,8191,0.01573866605758667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,16383,0.023813332120577495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,16383,0.03882666677236557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,16383,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,16383,0.02170666555563609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,32767,0.028832000990708668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,32767,0.06092800199985504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,32767,0.008037333066264788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,32767,0.03555200000603994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,65535,0.007829333345095316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,65535,0.035877334574858345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,65535,0.09261866410573323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,1,0.007930666829148928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,65535,0.05398400127887726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,1,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,1,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,1,0.0058453331391016645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,3,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,3,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,3,0.005877333382765452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,3,0.007391999786098798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,7,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,7,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,7,0.005872000008821487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,131071,0.14501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,7,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,15,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,131071,0.03793066740036011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,15,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,31,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,15,0.0058133335163195925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,15,0.005573333551486333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,131071,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,31,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,131071,0.09713066617647807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,31,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,31,0.00679466687142849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,63,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,127,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,127,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,63,0.008879999940594038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,63,0.005642666791876157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,63,0.006021333237489064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,127,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,127,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,255,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,255,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,255,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,255,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,511,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,511,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,511,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,511,0.008421333506703377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,1023,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,1023,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,1023,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,1023,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,2047,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,2047,0.024879999458789825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,2047,0.008133333176374435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,2047,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,4095,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,4095,0.032405334214369454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,4095,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,4095,0.014010666559139887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,8191,0.027866666515668232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,16383,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,8191,0.04463466505209605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,8191,0.007957333077987036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,8191,0.022848000129063923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,16383,0.06666666766007741
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,16383,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,16383,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,32767,0.03436266630887985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,32767,0.09186666210492452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,32767,0.008266666904091835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,32767,0.05130666494369507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,65535,0.03421866645415624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,65535,0.13920000195503235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,65535,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,65535,0.08437333504358928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,1,0.010197333370645842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,1,0.0057920000205437345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,1,0.006725333631038666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,3,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,3,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,3,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,3,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,7,0.009984000275532404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,7,0.0068693334857622785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,7,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,131071,0.00789866658548514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,7,0.008421333506703377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,15,0.010010666524370512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,15,0.010421333213647207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,131071,0.03651199986537298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,131071,0.14524267117182413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,15,0.0069759997228781385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,131071,0.2372373342514038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,15,0.0060159998635451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,31,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,31,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,31,0.0069226666043202085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,31,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,63,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,63,0.010549332946538925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,127,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,63,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,63,0.006208000083764394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,127,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,127,0.010005333150426546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,127,0.0069973332186539965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,511,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,255,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,255,0.01258133351802826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,255,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,255,0.007877333089709282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,511,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,511,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,511,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,1023,0.021935999393463135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,1023,0.024698667228221893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,1023,0.008112000301480293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,1023,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,2047,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,2047,0.03581333408753077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,2047,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,2047,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,4095,0.03414933383464813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,4095,0.05190399785836538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,4095,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,4095,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,8191,0.034389334420363106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,8191,0.06668266654014587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,8191,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,8191,0.029306667546431225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,16383,0.0340639998515447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,16383,0.08790399630864461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,16383,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,16383,0.046725332736968994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,32767,0.03379733363787333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,32767,0.13100799918174744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,32767,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,32767,0.07978133360544841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,65535,0.03379199902216593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,65535,0.21799999475479126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,65535,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,65535,0.1439466675122579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,1,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,1,0.042064001162846885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,131071,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,1,0.0423573354880015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,1,0.012645332763592402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,131071,0.27163734038670856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,3,0.0415040006240209
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,3,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,131071,0.3940639893213908
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,131071,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,3,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,3,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,7,0.041738669077555336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,7,0.04148799926042557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,7,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,15,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,15,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,7,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,31,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,15,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,15,0.04229333500067393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,31,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,63,0.040896000961462654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,31,0.041834667325019836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,31,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,63,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,63,0.04144533226887385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,127,0.04068800061941147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,127,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,63,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,127,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,255,0.04238399863243103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,127,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,255,0.02025066688656807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,255,0.044309332966804504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,511,0.043791999419530235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,255,0.01578666642308235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,511,0.03075733284155528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,1,0.07364266614119212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,511,0.0569653312365214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,1,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,511,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,3,0.07344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,1,0.07377600173155467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,1,0.019541333119074505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,3,0.018778666853904724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,3,0.07412800192832947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,3,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,7,0.0735999991496404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,7,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,7,0.07460799813270569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,15,0.07362666726112366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,7,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,15,0.0749066670735677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,31,0.07364800075689952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,15,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,31,0.07338666419188182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,15,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,31,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,31,0.01952533299724261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,63,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,63,0.07297066847483318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,127,0.0737013320128123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,63,0.0738506664832433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,127,0.0730506678422292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,127,0.022448000808556873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,63,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,255,0.07877866427103679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,127,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,255,0.07903466622034709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,511,0.07923733194669087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,511,0.0999893347422282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,255,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,255,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,511,0.04846400022506714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,511,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,1,0.032501332461833954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,1,0.03266666581233343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,1,0.13717866937319437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,1,0.13565333684285483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,3,0.03268266717592875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,3,0.13645333051681519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,3,0.13750933607419333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,7,0.13547733426094055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,7,0.13634133338928223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,3,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,7,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,15,0.13565867145856222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,7,0.03310933212439219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,15,0.13652799526850382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,15,0.03356799980004629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,15,0.03288000077009201
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,31,0.03267733256022135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,31,0.1365120013554891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,31,0.13725866874059042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,31,0.033157333731651306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,63,0.13594133655230203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,63,0.03272533416748047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,63,0.13671466708183289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,63,0.032560000816980995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,127,0.1381600002447764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,127,0.1383946637312571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,127,0.039994666973749794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,255,0.1448319951693217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,127,0.04061333338419596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,255,0.04032533367474874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,255,0.1449066698551178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,255,0.0576853354771932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,511,0.04104000081618627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,1,0.2576746741930644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,511,0.14631999532381693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,511,0.18420799573262533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,511,0.0881866713364919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,1,0.06366933385531108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,1,0.26200000445048016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,1,0.06503466765085857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,3,0.25785066684087116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,3,0.2606079975763957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,7,0.2593653400739034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,3,0.06293866535027821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,3,0.06367999811967213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,7,0.06386666496594746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,7,0.26153600215911865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,7,0.06498133142789204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,15,0.25775466362635296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,15,0.06348800162474315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,15,0.26013867060343426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,15,0.06322666505972545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,31,0.25965867439905804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,31,0.2613919973373413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,31,0.06391466657320659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,63,0.2617119948069255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,31,0.06485866506894429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,63,0.26415467262268066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,63,0.06436266501744588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,127,0.2598346670468648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,63,0.06303466856479645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,127,0.07795199751853943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,127,0.2614186604817708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,127,0.0790773332118988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,255,0.2749386628468831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,255,0.2764959931373596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,255,0.07916800181070964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,255,0.10653332869211833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,511,0.3502720197041829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,511,0.2765653332074483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,511,0.07924266656239827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,1,0.0763626645008723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,511,0.1673706571261088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,1,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,3,0.07569066683451335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,1,0.07677866518497467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,1,0.022341333329677582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,3,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,3,0.0761599987745285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,7,0.07594133416811626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,7,0.021685334543387096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,3,0.02237333357334137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,7,0.07704533139864604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,15,0.07607466479142506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,7,0.022346665461858112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,15,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,15,0.07641066610813141
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,31,0.07573866844177246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,15,0.0222080002228419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,31,0.021781332790851593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,31,0.07670400043328603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,63,0.07442666590213776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,31,0.022442666192849476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,63,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,63,0.07530133426189423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,63,0.022128000855445862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,127,0.07533866663773854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,127,0.026682667434215546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,127,0.07674133280913036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,127,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,255,0.08051733175913493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,255,0.0817493349313736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,255,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,1,0.13700800140698752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,1,0.13728533188501993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,3,0.13661332925160727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,255,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,1,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,3,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,1,0.033957332372665405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,7,0.13727466265360513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,7,0.13673599561055502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,3,0.0340639998515447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,3,0.13870400190353394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,7,0.0335359995563825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,7,0.03409600009520849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,15,0.13729066650072733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,15,0.03322133421897888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,15,0.1383039951324463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,31,0.1369706690311432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,15,0.0340693344672521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,31,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,31,0.13893866539001465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,63,0.13741866747538248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,31,0.034058667719364166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,63,0.03310399999221166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,127,0.1402400036652883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,63,0.1381280024846395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,127,0.042437334855397545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,63,0.033887999753157295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,255,0.1483519971370697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,127,0.14149866501490274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,127,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,255,0.14818132917086282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,255,0.05923733115196228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,1,0.26229333877563477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,255,0.043365334471066795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,1,0.2651840051015218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,3,0.26467732588450116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,3,0.26331732670466107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,1,0.06709333260854085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,1,0.0670826683441798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,3,0.06607999900976817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,3,0.06631466746330261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,7,0.06596800188223521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,7,0.26499199867248535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,7,0.2648640076319377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,15,0.2675146659215291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,31,0.2658933401107788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,15,0.26654932896296185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,15,0.06592000027497609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,31,0.26712532838185626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,31,0.06668800115585327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,63,0.26789865891138714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,31,0.06709866722424825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,63,0.26921600103378296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,63,0.06582933167616527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,127,0.26586665709813434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,63,0.06681066751480103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,127,0.26768000920613605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,127,0.07964266836643219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,15,0.06596800188223521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,255,0.27823466062545776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,7,0.06683733562628429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,127,0.07961066563924153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,255,0.2794933319091797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,255,0.08006933331489563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,1,0.1302826702594757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,1,0.13110933701197305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,1,0.5152746836344401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,1,0.5077173312505087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,3,0.1301866670449575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,3,0.5122933387756348
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,3,0.5079520146052042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,7,0.5105493466059366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,7,0.5150613387425741
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,7,0.13057066996892294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,15,0.1309279998143514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,15,0.5150186618169149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,15,0.5112053155899048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,31,0.5211999813715616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,15,0.1317813297112783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,31,0.5182613531748453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,31,0.13146133224169412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,31,0.13064000010490417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,63,0.5237866640090942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,255,0.10816533366839091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,63,0.5248159964879354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,63,0.13034666577974954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,127,0.5127093394597372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,127,0.5103946526845297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,7,0.13024000326792398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,127,0.15151466925938925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,255,0.5394879976908366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,1,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,255,0.2057653268178304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,255,0.5386026700337728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,1,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,1,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,1,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,3,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,255,0.15085867047309875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,3,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,3,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,7,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,3,0.1313973367214203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,3,0.006789333497484525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,7,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,7,0.006693333387374878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,7,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,15,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,15,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,15,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,15,0.006698666761318843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,31,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,31,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,31,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,31,0.006309333567818006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,63,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,63,0.13140799601872763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,63,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,63,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,63,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,127,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,127,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,127,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,127,0.15154133240381876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,127,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,255,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,255,0.011653333902359009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,255,0.007738666608929634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,255,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,511,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,511,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,511,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,511,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,1023,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,1023,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,1023,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,1023,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,2047,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,2047,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,2047,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,2047,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,4095,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,4095,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,4095,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,4095,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,8191,0.020746666938066483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,8191,0.02871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,8191,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,8191,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,16383,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,16383,0.039034667114416756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,16383,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,16383,0.026288000245889027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,32767,0.06192533175150553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,32767,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,32767,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,32767,0.04469866553942362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,65535,0.03586133321126302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,1,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,1,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,1,0.006815999746322632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,65535,0.09355733791987102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,1,0.00730666642387708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,3,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,65535,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,65535,0.06810666620731354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,3,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,3,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,3,0.007120000198483467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,7,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,7,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,15,0.006831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,7,0.00578666664659977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,7,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,15,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,15,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,15,0.00701333334048589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,31,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,31,0.009935999910036722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,31,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,31,0.005749333028992017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,63,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,127,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,63,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,63,0.005754666402935982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,63,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,255,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,127,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,127,0.011663999408483505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,511,0.01251199965675672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,127,0.006181333214044571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,255,0.012122667084137598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,255,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,255,0.00749333327015241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,511,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,511,0.007482666522264481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,511,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,1023,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,2047,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,1023,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,1023,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,1023,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,2047,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,2047,0.024874667326609295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,2047,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,4095,0.02439466615517934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,4095,0.0330826664964358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,4095,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,4095,0.014538666854302088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,8191,0.044341335693995156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,8191,0.027855999767780304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,8191,0.008016000191370646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,8191,0.023599999646345775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,16383,0.06745600203673045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,16383,0.034287999073664345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,16383,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,16383,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,32767,0.034858666360378265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,32767,0.09272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,32767,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,32767,0.05243200063705444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,1,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,1,0.010565333068370819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,65535,0.035018667578697205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,1,0.00573333352804184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,65535,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,65535,0.139082670211792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,3,0.0057386669019858045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,1,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,3,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,3,0.010368000095089277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,7,0.007151999821265538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,65535,0.08707200487454732
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,3,0.0057920000205437345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,7,0.01051733394463857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,7,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,7,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,31,0.010170666500926018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,15,0.010490667074918747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,15,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,15,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,63,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,15,0.00589866687854131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,127,0.01033599985142549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,31,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,31,0.00578666664659977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,31,0.008250666782259941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,63,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,63,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,63,0.007274666801095009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,127,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,127,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,127,0.007477333148320516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,255,0.011962667107582092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,255,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,255,0.007605333502093951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,255,0.007877333089709282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,511,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,511,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,1023,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,511,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,511,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,1023,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,2047,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,1023,0.024986666937669117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,4095,0.05222400029500326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,1023,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,2047,0.028064000109831493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,2047,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,2047,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,8191,0.06692799925804138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,4095,0.03405333310365677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,4095,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,4095,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,8191,0.033786666889985405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,8191,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,8191,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,16383,0.035088000198205314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,16383,0.0888213316599528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,16383,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,32767,0.034389334420363106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,32767,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,16383,0.04710400104522705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,32767,0.1312266687552134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,32767,0.07901333272457123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,1,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,1,0.006090666477878888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,1,0.006170666466156642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,65535,0.033946665624777474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,65535,0.21977599461873373
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,3,0.006058666855096817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,65535,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,3,0.005930666501323382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,65535,0.14436266819636026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,15,0.005994666367769241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,7,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,7,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,7,0.007120000198483467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,15,0.010288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,63,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,31,0.010421333213647207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,15,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,31,0.010277333358923594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,31,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,31,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,63,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,63,0.0058399997651577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,63,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,127,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,127,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,127,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,127,0.007109333450595538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,255,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,255,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,255,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,511,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,1023,0.03269333392381668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,255,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,511,0.022874665757020313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,511,0.025610665480295818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,511,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,2047,0.050698667764663696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,1023,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,1023,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,1023,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,2047,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,2047,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,2047,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,4095,0.03498666733503342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,4095,0.06601066887378693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,4095,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,4095,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,8191,0.0881173312664032
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,8191,0.0354720006386439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,8191,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,8191,0.045365333557128906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,16383,0.035018667578697205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,16383,0.13105600078900656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,16383,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,16383,0.07853866616884868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,32767,0.14202666282653809
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,32767,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,32767,0.21858133872350058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,32767,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,65535,0.03473600000143051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,1,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,1,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,65535,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,3,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,65535,0.3891893227895101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,3,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,7,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,7,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,65535,0.2730293273925781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,15,0.015685333559910457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,15,0.006842666616042455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,31,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,63,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,31,0.007829333345095316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,63,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,127,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,127,0.008272000278035799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,255,0.013568000247081121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,255,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,511,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,511,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,1023,0.020090666910012562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,1023,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,2047,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,2047,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,16383,0.08943999807039897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,16383,0.05417599777380625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,4095,0.04187199970086416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,4095,0.024666666984558105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,8191,0.06053866446018219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,32767,0.12692800164222717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,8191,0.032357332607110344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,1,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,1,0.00589866687854131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,3,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,3,0.005984000240763028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,32767,0.0806933343410492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,7,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,7,0.007050666958093643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,15,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,15,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,31,0.010565333068370819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,31,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,255,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,63,0.010266666611035665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,63,0.007120000198483467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,127,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,127,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,255,0.014005333185195923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,511,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,511,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,4095,0.06756266454855601
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,1023,0.028394666810830433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,1023,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,2047,0.04331733286380768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,2047,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,4095,0.028714666763941448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,8191,0.08501332998275757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,8191,0.04066666712363561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,16383,0.12223999698956807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,16383,0.06012799839178721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,32767,0.19187732537587485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,1,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,1,0.006021333237489064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,3,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,3,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,32767,0.10026133060455322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,7,0.011306667079528173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,7,0.006010666489601135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,15,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,15,0.00590933362642924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,31,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,31,0.00702400008837382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,63,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,63,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,127,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,127,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,255,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,255,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,511,0.028751999139785767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,511,0.013888000200192133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,1023,0.04638933142026266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,1023,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,2047,0.06609066824118297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,2047,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,4095,0.0844586690266927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,4095,0.0384853333234787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,8191,0.12040533622105916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,8191,0.05694933235645294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,16383,0.19022399187088013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,16383,0.09753066301345825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,32767,0.33269866307576496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,32767,0.17808000246683756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,3,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,7,0.0075093333919843035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,1,0.007184000064929326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,31,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,15,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,63,0.00730666642387708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,127,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,7,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,15,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,511,0.046522667010625206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,3,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,1,0.016042667130629223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,1023,0.062650665640831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,255,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,31,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,2047,0.08372799555460612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,511,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,63,0.015722667177518208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,2047,0.036992001036802925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,127,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,4095,0.11891733606656392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,8191,0.1901706655820211
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,255,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,8191,0.09629866480827332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,16383,0.3311093250910441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,1,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,16383,0.17697600523630777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,1,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,3,0.008496000121037165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,3,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,32767,0.33618664741516113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,7,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,7,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,32767,0.6122346719106039
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,15,0.009935999910036722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,15,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,31,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,31,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,63,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,63,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,127,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,127,0.008090666805704435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,255,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,1023,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,255,0.0100853331387043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,2047,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,2047,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,511,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,511,0.009488000224033991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,1023,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,4095,0.02828266719977061
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,4095,0.013327999661366144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,8191,0.018405333161354065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,8191,0.01637866720557213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,16383,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,16383,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,32767,0.02455466737349828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,32767,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,1,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,1,0.005882666756709416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,3,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,65535,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,3,0.005872000008821487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,65535,0.030554667115211487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,7,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,7,0.0058186668902635574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,131071,0.03889599939187368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,15,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,15,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,31,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,31,0.005770666524767876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,63,0.007082666580875714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,127,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,63,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,255,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,131071,0.040336000422636666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,127,0.006565333033601443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,255,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,511,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,511,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,1023,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,1023,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,2047,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,2047,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,8191,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,8191,0.012608000387748083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,1023,0.02495466669400533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,4095,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,4095,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,16383,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,16383,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,4095,0.05612266560395559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,32767,0.024234667420387268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,32767,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,1,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,1,0.005557333429654439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,65535,0.030229332546393078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,3,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,3,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,65535,0.024373332659403484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,7,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,7,0.006906666482488315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,15,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,15,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,31,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,31,0.00595199999709924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,63,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,131071,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,63,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,127,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,131071,0.05756799876689911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,127,0.007269333427151044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,255,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,255,0.007760000104705493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,511,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,511,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,1023,0.01661866654952367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,1023,0.008592000231146812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,2047,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,2047,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,4095,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,4095,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,8191,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,8191,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,16383,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,16383,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,32767,0.03465066601832708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,32767,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,1,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,65535,0.045312002301216125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,1,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,3,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,65535,0.026426665484905243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,3,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,7,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,7,0.005488000189264615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,15,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,15,0.008421333506703377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,31,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,31,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,63,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,131071,0.042037333051363625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,63,0.005626666670044263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,127,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,131071,0.07582933207352956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,127,0.00843733362853527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,255,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,2047,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,255,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,511,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,511,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,1023,0.013264000415802002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,1023,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,2047,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,4095,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,4095,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,8191,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,8191,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,16383,0.029909332593282063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,16383,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,32767,0.049029335379600525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,32767,0.024608001112937927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,1,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,1,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,65535,0.07311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,3,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,65535,0.046165332198143005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,3,0.0069226666043202085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,7,0.008629333227872849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,7,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,15,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,15,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,63,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,31,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,31,0.006693333387374878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,63,0.0075626665105422335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,127,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,131071,0.12376532951990764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,127,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,131071,0.06567466755708058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,255,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,255,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,511,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,511,0.010234666367371878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,1023,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,1023,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,2047,0.02812800059715907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,2047,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,4095,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,4095,0.013594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,8191,0.01982933282852173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,8191,0.01600533351302147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,16383,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,16383,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,32767,0.02497066557407379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,32767,0.023978665471076965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,1,0.005765333150823911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,3,0.008656000097592672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,3,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,65535,0.031023999055226643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,7,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,65535,0.030986666679382324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,7,0.006058666855096817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,15,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,15,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,31,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,31,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,63,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,63,0.00690133310854435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,255,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,127,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,131071,0.05962666869163513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,127,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,131071,0.04619200030962626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,255,0.007738666608929634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,511,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,511,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,1023,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,1023,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,8191,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,2047,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,2047,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,4095,0.016373333831628162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,16383,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,4095,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,8191,0.013386666774749756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,16383,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,32767,0.03569599986076355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,32767,0.021898667017618816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,1,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,65535,0.04678399860858917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,65535,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,3,0.007050666958093643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,1,0.007109333450595538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,3,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,7,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,7,0.00578666664659977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,15,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,15,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,31,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,131071,0.043391997615496315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,31,0.007184000064929326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,63,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,63,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,127,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,131071,0.07844799757003784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,1023,0.013530666629473368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,127,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,255,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,255,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,511,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,511,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,8191,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,1023,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,2047,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,2047,0.01611199975013733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,4095,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,4095,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,8191,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,16383,0.031130666534105938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,16383,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,32767,0.024277334411938984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,32767,0.05026133358478546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,1,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,65535,0.07383466760317485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,1,0.005712000032265981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,3,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,65535,0.04643199841181437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,3,0.00731733317176501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,7,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,7,0.005797333394487699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,15,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,15,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,31,0.009914666414260864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,63,0.0069759997228781385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,31,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,63,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,131071,0.12460800011952718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,127,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,127,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,131071,0.06603200236956279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,255,0.011760000139474869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,255,0.0085333331177632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,511,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,511,0.008069333309928576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,1023,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,1023,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,2047,0.020661332954963047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,2047,0.010506667196750641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,4095,0.024277334411938984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,4095,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,8191,0.034671999514102936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,8191,0.016314666718244553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,16383,0.050517335534095764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,16383,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,32767,0.0788213312625885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,32767,0.03923733284076055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,1,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,1,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,65535,0.12398933370908101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,3,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,3,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,65535,0.06460799773534139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,7,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,15,0.010512000570694605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,15,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,63,0.0069759997228781385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,31,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,31,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,63,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,127,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,127,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,255,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,131071,0.20863999923070273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,131071,0.10950400431950887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,255,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,511,0.02015999952952067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,511,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,1023,0.028565332293510437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,1023,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,2047,0.04345066845417023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,2047,0.02385066697994868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,4095,0.06817600131034851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,8191,0.05171733101209005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,4095,0.035962666074434914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,8191,0.08784000078837077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,1,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,16383,0.12322133779525757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,1,0.007130666946371396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,3,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,3,0.007082666580875714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,16383,0.07419733206431071
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,7,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,7,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,15,0.006229333579540253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,31,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,31,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,63,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,511,0.028933333853880566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,63,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,127,0.011493333925803503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,127,0.0075626665105422335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,255,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,2047,0.06623466809590657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,255,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,511,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,1023,0.04669866462548574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,1023,0.02231466770172119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,2047,0.02757333219051361
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,4095,0.08515733480453491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,4095,0.04065066576004028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,8191,0.06022400160630544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,8191,0.12136000394821167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,1,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,1,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,3,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,16383,0.19262399276097616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,3,0.006506666541099548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,7,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,16383,0.09994666775067647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,7,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,15,0.016186666985352833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,15,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,31,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,31,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,63,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,63,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,127,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,127,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,255,0.030986666679382324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,255,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,511,0.046906664967536926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,511,0.019445333629846573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,1023,0.06283733248710632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,1023,0.025701334079106648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,2047,0.08449066678682964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,2047,0.03822399924198786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,8191,0.1917440096537272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,4095,0.12050132950146993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,4095,0.05624000231424967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,8191,0.09755200147628784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,1,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,1,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,3,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,16383,0.3351893424987793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,3,0.010197333370645842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,16383,0.17808000246683756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,7,0.025759999950726826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,7,0.01003200002014637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,15,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,15,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,31,0.02606400102376938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,31,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,63,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,127,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,127,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,63,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,255,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,255,0.050442665815353394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,511,0.06557866434256236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,511,0.024405332903067272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,1023,0.08218133449554443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,1023,0.036687999963760376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,2047,0.12032000223795573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,2047,0.05628266433874766
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,4095,0.19144533077875772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,4095,0.09617066383361816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,8191,0.17482666174570718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,8191,0.3338559865951538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,1,0.008629333227872849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,1,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,3,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,3,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,7,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,16383,0.6181653340657552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,16383,0.33713066577911377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,7,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,15,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,15,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,127,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,31,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,31,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,63,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,63,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,127,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,255,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,255,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,511,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,511,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,1023,0.02794133375088374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,1023,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,2047,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,2047,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,4095,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,4095,0.0136266661187013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,8191,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,8191,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,16383,0.022778667509555817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,16383,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,32767,0.037205333511034645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,32767,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,1,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,1,0.005893333504597346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,65535,0.03477866699298223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,65535,0.048842668533325195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,3,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,3,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,7,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,7,0.0058613332609335584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,15,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,63,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,15,0.005941333249211311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,31,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,31,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,131071,0.05502399802207947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,63,0.0058666666348775225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,127,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,127,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,255,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,1023,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,255,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,131071,0.08010133107503255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,511,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,511,0.00810666692753633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,1023,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,2047,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,2047,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,4095,0.018031999468803406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,4095,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,8191,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,8191,0.027632000545660656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,16383,0.03175999969244003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,16383,0.01825599993268649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,32767,0.05136533578236898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,32767,0.026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,1,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,3,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,65535,0.046869332591692604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,3,0.007130666946371396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,7,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,65535,0.07557866473992665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,7,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,31,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,15,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,15,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,63,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,31,0.0068853336075941724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,127,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,63,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,255,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,131071,0.06647466619809468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,511,0.008250666782259941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,127,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,131071,0.12688000003496805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,255,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,511,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,1023,0.013744000345468521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,1023,0.008272000278035799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,2047,0.021903999149799347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,2047,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,4095,0.02480533222357432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,4095,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,8191,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,8191,0.016362667083740234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,16383,0.05329599976539612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,16383,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,32767,0.03867733230193456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,1,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,32767,0.08040000001589458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,1,0.005775999898711841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,3,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,7,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,3,0.007087999954819679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,65535,0.12667199969291687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,65535,0.06492266555627187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,7,0.005914666379491488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,15,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,15,0.007151999821265538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,31,0.009888000165422758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,31,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,63,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,63,0.006133333469430606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,131071,0.2101866602897644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,127,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,127,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,1023,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,255,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,131071,0.1111946702003479
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,255,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,511,0.013616000612576803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,511,0.008207999790708223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,1023,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,2047,0.02865600089232127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,2047,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,4095,0.04029866556326548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,4095,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,8191,0.05900266766548157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,8191,0.024117333193620045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,16383,0.08635733524958293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,16383,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,32767,0.06121066709359487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,32767,0.12517333030700684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,1,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,1,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,3,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,65535,0.19974400599797568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,7,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,15,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,3,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,65535,0.10652266939481099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,7,0.00731733317176501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,31,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,15,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,31,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,63,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,63,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,127,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,127,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,255,0.02231466770172119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,131071,0.18010665973027548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,255,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,131071,0.3582559823989868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,511,0.029658667743206024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,511,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,1023,0.047295997540156044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,1023,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,2047,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,2047,0.06759466727574666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,4095,0.08667733271916707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,8191,0.12264532844225566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,4095,0.0513973335425059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,1,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,1,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,8191,0.07452799876530965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,3,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,7,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,3,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,7,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,15,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,15,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,31,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,63,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,127,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,31,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,63,0.008629333227872849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,127,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,255,0.03102933367093404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,255,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,511,0.047322665651639305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,511,0.021925332645575207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,1023,0.06282133360703786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,1023,0.02784000088771184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,2047,0.040405333042144775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,2047,0.08508267005284627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,4095,0.12090667088826497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,4095,0.058890665570894875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,1,0.02585600068171819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,1,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,8191,0.19413334131240845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,3,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,8191,0.09930133819580078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,3,0.010341333225369453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,7,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,7,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,15,0.025775998830795288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,15,0.010341333225369453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,31,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,31,0.02553066611289978
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,255,0.05089066425959269
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,63,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,63,0.010197333370645842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,127,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,127,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,1023,0.04014399896065394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,255,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,511,0.06461333235104878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,511,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,1023,0.08197866876920064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,2047,0.05615466833114624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,2047,0.1221386690934499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,4095,0.19353600343068442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,4095,0.09683199723561604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,1,0.044794668753941856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,3,0.014362666755914688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,1,0.014293332894643148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,7,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,3,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,8191,0.17593600352605185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,8191,0.33724268277486164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,7,0.04413333535194397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,15,0.04423466821511587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,15,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,31,0.04451199869314829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,31,0.014256000518798828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,63,0.043680002291997276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,63,0.013701333353916803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,127,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,127,0.04544533292452494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,255,0.053130666414896645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,255,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,511,0.07127999762694041
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,511,0.0329066663980484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,1023,0.10342933734258015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,4095,0.31194132566452026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,1023,0.051914667089780174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,2047,0.17613333463668823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,2047,0.0930613378683726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,4095,0.17329599459966025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,1,0.01639466608564059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,1,0.008325333396593729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,3,0.016208000481128693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,3,0.007877333089709282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,8191,0.3317653338114421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,8191,0.5871946811676025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,7,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,15,0.016314666718244553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,7,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,15,0.008314666648705801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,31,0.016293333222468693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,31,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,63,0.01642666632930438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,63,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,127,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,127,0.016666666915019352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,255,0.032613334556420646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,255,0.018496000518401463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,1023,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,511,0.049072002371152244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,511,0.026485333840052288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,1023,0.06450133522351582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,2047,0.051130667328834534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,1,0.02658133457104365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,2047,0.0860746701558431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,1,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,3,0.025941332181294758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,3,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,7,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,15,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,7,0.011274666835864386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,15,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,31,0.025797332326571148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,31,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,63,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,63,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,127,0.026613332331180573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,127,0.013386666774749756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,255,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,511,0.028170667588710785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,255,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,511,0.0655626654624939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,1023,0.08255999783674876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,1,0.04462933540344238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,1023,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,2047,0.058287998040517174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,2047,0.12371200323104858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,1,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,7,0.014186666657527288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,3,0.044309332966804504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,3,0.014271999398867289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,7,0.04465066889921824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,15,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,15,0.04438399771849314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,63,0.04410133262475332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,31,0.046069333950678505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,31,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,63,0.013642666240533194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,127,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,127,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,255,0.05318933228651682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,255,0.022298666338125866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,511,0.07031466563542683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,511,0.033946665624777474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,1023,0.05301333467165629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,1023,0.10569066802660625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,1,0.08073066671689351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,2047,0.17765865723292032
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,1,0.023930666347344715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,2047,0.09355733791987102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,3,0.08086933195590973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,3,0.024245334168275196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,7,0.08087466657161713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,7,0.024319998919963837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,15,0.08070399860541026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,31,0.08105066418647766
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,15,0.02422933280467987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,63,0.07978133360544841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,31,0.02399466683467229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,63,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,127,0.08225066463152568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,127,0.0281333327293396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,255,0.09455999732017517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,255,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,511,0.058677335580190025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,511,0.12787200013796488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,1023,0.19721599419911703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,1023,0.09865066409111023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,2047,0.3349440097808838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,1,0.02696000039577484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,2047,0.17777599891026816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,1,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,3,0.026693334182103474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,3,0.013445333888133367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,7,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,15,0.026906666656335194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,7,0.01332266628742218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,15,0.012608000387748083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,31,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,31,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,63,0.026421333352724712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,63,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,127,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,127,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,255,0.02606933315594991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,255,0.05400000015894572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,511,0.06788800160090129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,511,0.036271999279658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,1,0.04594666759173075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,1023,0.08402132987976074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,3,0.04580266773700714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,1023,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,1,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,3,0.015642666568358738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,7,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,7,0.04644800225893656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,15,0.045925334095954895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,15,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,31,0.04691733419895172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,31,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,63,0.046767999728520714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,63,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,127,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,127,0.04666133224964142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,255,0.05457599957784017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,255,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,511,0.07226133346557617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,511,0.037258667250474296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,1023,0.05560533205668131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,1023,0.10731200377146403
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,1,0.023717333873112995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,1,0.08227733274300893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,3,0.024175999065240223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,3,0.08184533317883809
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,7,0.023845332364241283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,7,0.08228266735871632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,15,0.08152533570925395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,15,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,31,0.08224000036716461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,31,0.024234667420387268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,63,0.02367999901374181
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,63,0.08221866687138875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,127,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,127,0.08424533406893413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,255,0.09565333525339763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,255,0.041877334316571556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,511,0.06081066528956095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,511,0.12852799892425537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,1023,0.09988266229629517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,1,0.04274666806062063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,3,0.04301866888999939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,1023,0.20061333974202475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,1,0.15230933825174967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,3,0.15230933825174967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,7,0.043365334471066795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,7,0.15132799744606018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,15,0.042810668547948204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,31,0.04372266431649526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,63,0.15468266606330872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,15,0.15076800187428793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,31,0.15154666701952615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,63,0.04358933369318644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,127,0.15343999862670898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,127,0.05386666456858317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,255,0.17831466595331827
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,255,0.07190399865309398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,1,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,511,0.10797866185506184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,511,0.241866668065389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,1,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,3,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,3,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,7,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,31,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,1023,0.37345067660013836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,7,0.006784000123540561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,1023,0.18627200524012247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,15,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,15,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,31,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,63,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,63,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,511,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,127,0.011957333733638128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,127,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,255,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,255,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,4095,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,511,0.026127999027570088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,1023,0.013701333353916803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,1023,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,2047,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,2047,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,4095,0.01421333352724711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,8191,0.01766933376590411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,8191,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,16383,0.0321066677570343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,16383,0.022426667312781017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,32767,0.05118933320045471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,32767,0.03296533226966858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,1,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,1,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,3,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,3,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,7,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,65535,0.05895466605822245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,65535,0.07726933558781941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,7,0.0068693334857622785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,15,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,15,0.008394666636983553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,31,0.009882666791478792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,31,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,63,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,63,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,127,0.01157333329319954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,131071,0.12755733728408813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,127,0.006586666529377301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,255,0.012234666695197424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,255,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,131071,0.08575999736785889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,511,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,511,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,1023,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,1023,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,2047,0.021888000269730885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,8191,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,2047,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,4095,0.02495466669400533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,4095,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,8191,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,16383,0.052709331115086876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,16383,0.028954667349656422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,32767,0.0805920014778773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,32767,0.04137066751718521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,1,0.009477333476146063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,1,0.005664000287652016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,3,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,3,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,7,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,65535,0.12681066989898682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,7,0.007573333258430163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,15,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,65535,0.06877866884072621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,15,0.005914666379491488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,31,0.010255999863147736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,31,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,63,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,63,0.0069759997228781385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,127,0.011690666278203329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,127,0.007338666667540868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,255,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,255,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,131071,0.21052267154057822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,511,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,511,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,1023,0.019541333119074505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,131071,0.11185600360234578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,1023,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,2047,0.02863999952872594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,2047,0.011637333780527115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,4095,0.04052799940109253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,4095,0.018474667022625606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,8191,0.058730666836102806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,8191,0.02439466615517934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,16383,0.08683199683825175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,16383,0.040522667268911995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,32767,0.1239413321018219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,32767,0.062047998110453285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,1,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,1,0.007002666592597961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,3,0.006805333619316419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,3,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,7,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,65535,0.10666666428248088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,65535,0.19971734285354614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,7,0.0069759997228781385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,15,0.005775999898711841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,31,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,31,0.008282666405042013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,63,0.00589866687854131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,127,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,127,0.005989333614706993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,255,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,255,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,511,0.019909333437681198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,131071,0.3596320152282715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,131071,0.18004266421000162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,511,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,1023,0.028688001135985058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,1023,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,2047,0.04285866518815359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,2047,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,4095,0.06758399804433186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,4095,0.02622933437426885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,8191,0.08454400300979614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,8191,0.03737066686153412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,16383,0.12043199936548869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,16383,0.05691733459631602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,32767,0.19046932458877563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,32767,0.09778133034706116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,1,0.01826133330663045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,65535,0.33241067330042523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,65535,0.17839467525482178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,1,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,3,0.04749333361784617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,3,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,7,0.047007997830708824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,131071,0.6156533161799113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,7,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,15,0.048826664686203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,63,0.046522667010625206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,15,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,31,0.04752533137798309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,31,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,131071,0.33820799986521405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,63,0.018229333062966663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,127,0.04955733319123586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,127,0.0223786657055219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,255,0.057914664347966514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,1,0.08329600095748901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,255,0.033002667129039764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,511,0.07541333138942719
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,3,0.08371200164159139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,511,0.046821330984433494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,1,0.02683199942111969
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,7,0.08328533172607422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,3,0.02678400029738744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,15,0.02606933315594991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,7,0.026373334228992462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,15,0.08442667126655579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,63,0.08366400003433228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,31,0.08432533343633015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,31,0.02693866689999898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,127,0.08616532882054646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,63,0.026560001075267792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,255,0.04669866462548574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,127,0.03489066660404205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,255,0.10019733508427937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,1,0.1527733306090037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,511,0.13281066219011942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,3,0.15212266643842062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,1,0.04628799855709076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,7,0.04689066608746847
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,511,0.06649599969387054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,3,0.046485334634780884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,7,0.15371732910474142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,15,0.046223998069763184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,31,0.04669866462548574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,15,0.15307199954986572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,31,0.15569067001342773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,127,0.055919999877611794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,63,0.15648000439008078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,63,0.04610133171081543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,127,0.15661333004633585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,255,0.1816533406575521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,511,0.11177600423494975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,255,0.07502399881680806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,511,0.24501333634058634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,3,0.08832533160845439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,1,0.29632000128428143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,1,0.08852799733479817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,3,0.2951786716779073
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,7,0.2921866575876872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,7,0.08905067046483357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,31,0.29983999331792194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,15,0.08825066685676575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,15,0.29709867636362713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,31,0.08903466661771138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,127,0.29658667246500653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,63,0.2976800004641215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,63,0.08906666437784831
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,255,0.3465493520100911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,127,0.10044800241788228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,255,0.13475199540456137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,1,0.08661866188049316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,511,0.4699999888737996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,511,0.20958399772644043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,3,0.03385599950949351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,1,0.034314667185147606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,3,0.08739733695983887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,15,0.08665066957473755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,7,0.08749866485595703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,31,0.03369066615899404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,7,0.03427733232577642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,15,0.035061334570248924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,31,0.08845333258310954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,127,0.08965866764386494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,255,0.10407466689745586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,63,0.08830400307973225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,63,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,127,0.050848002235094704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,3,0.15686933199564615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,255,0.0620000014702479
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,1,0.15813866257667542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,3,0.055439998706181846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,1,0.05622399846712748
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,15,0.05589866638183594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,7,0.1576479971408844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,7,0.05643733342488607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,63,0.16107733050982156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,15,0.15862933794657388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,31,0.15893333156903586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,31,0.05550933380921682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,127,0.16173866391181946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,63,0.05541866521040598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,255,0.08374399940172832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,127,0.06656000018119812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,3,0.09016533692677815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,255,0.1882879932721456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,1,0.2975253264109294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,1,0.09047999978065491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,7,0.09005866448084514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,3,0.2977813283602397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,7,0.30021866162618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,15,0.29980266094207764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,63,0.3019946614901225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,15,0.09106133381525676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,31,0.0902400016784668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,31,0.3036746581395467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,63,0.09146133065223694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,127,0.3038506706555684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,255,0.3517119884490967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,127,0.10542399684588115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,255,0.14169599612553915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,1,0.17069866259892783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,1,0.5762453476587931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,7,0.5843040148417155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,3,0.5815573136011759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,3,0.17063466707865396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,7,0.169978658358256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,31,0.17065600554148355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,15,0.1707680026690165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,15,0.59006400903066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,127,0.5788266658782959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,31,0.588149348894755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,63,0.5874240001042684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,1,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,1,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,63,0.17096533377965292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,3,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,3,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,7,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,255,0.6801226933797201
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,7,0.006773333375652631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,15,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,15,0.00749333327015241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,127,0.19286400079727173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,31,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,31,0.0100853331387043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,63,0.00679466687142849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,511,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,255,0.26364799340566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,127,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,127,0.008245333408315977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,255,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,2047,0.02242133269707362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,255,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,511,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,1023,0.013274667163689932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,1023,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,8191,0.022202665607134502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,2047,0.013642666240533194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,4095,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,4095,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,8191,0.036650667587916054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,16383,0.05330666899681091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,16383,0.03605866680542628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,1,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,1,0.007381333038210869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,32767,0.08290133376916249
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,32767,0.05186133086681366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,3,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,3,0.006010666489601135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,7,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,7,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,15,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,15,0.007151999821265538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,65535,0.1304639975229899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,65535,0.08665600419044495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,31,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,31,0.005552000055710475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,63,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,63,0.007647999872763951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,127,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,1023,0.019754666835069656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,127,0.008016000191370646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,255,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,255,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,511,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,511,0.008325333396593729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,1023,0.010469333579142889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,2047,0.028991999725500744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,2047,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,4095,0.040394666294256844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,4095,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,8191,0.05923733115196228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,8191,0.025600001215934753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,16383,0.08678399523099263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,1,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,32767,0.12485866745313008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,16383,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,1,0.00589866687854131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,32767,0.064751997590065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,3,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,3,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,7,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,7,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,15,0.010255999863147736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,65535,0.2013439933458964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,65535,0.10912000139554341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,15,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,31,0.01020800011853377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,31,0.007733333234985669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,255,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,63,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,127,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,255,0.014032000054915747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,127,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,511,0.019802667200565338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,1023,0.027973333994547527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,511,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,1023,0.014032000054915747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,2047,0.04342400034268697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,8191,0.08449600140253703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,2047,0.017968000223239262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,4095,0.0677706648906072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,4095,0.026330667237440746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,8191,0.0388373335202535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,16383,0.12019200126330058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,16383,0.058133333921432495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,1,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,32767,0.19131733973821005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,1,0.005978666866819064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,32767,0.09768533706665039
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,3,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,3,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,7,0.006021333237489064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,15,0.011322667201360067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,15,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,31,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,65535,0.3363306522369385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,31,0.00595199999709924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,65535,0.1776640017827352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,63,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,63,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,127,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,127,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,255,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,255,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,511,0.028581333657105763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,511,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,1023,0.04614933331807455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,1023,0.019850666324297588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,2047,0.06633600095907848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,2047,0.024879999458789825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,4095,0.08411733309427898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,4095,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,8191,0.11920000116030376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,8191,0.05672533313433329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,16383,0.19049066305160522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,16383,0.09711466232935588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,1,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,1,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,32767,0.32867733637491864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,1,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,32767,0.17715734243392944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,1,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,3,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,3,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,3,0.005754666402935982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,3,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,65535,0.33694398403167725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,7,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,7,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,65535,0.6176373163859049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,7,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,7,0.008250666782259941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,15,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,15,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,15,0.005775999898711841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,15,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,31,0.010159999753038088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,31,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,31,0.007391999786098798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,31,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,63,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,63,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,63,0.00590933362642924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,63,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,255,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,127,0.011920000116030375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,127,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,127,0.007349333415428798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,127,0.00816000004609426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,255,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,255,0.007082666580875714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,255,0.008261333530147871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,511,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,511,0.012426666915416718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,511,0.007631999750932057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,511,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,1023,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,1023,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,1023,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,1023,0.009583999713261923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,2047,0.020117333779732387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,2047,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,2047,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,2047,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,4095,0.02439466615517934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,4095,0.03271466741959254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,4095,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,4095,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,8191,0.02874133239189784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,8191,0.045253331462542214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,8191,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,8191,0.023946667710940044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,16383,0.034330666065216064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,16383,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,16383,0.06955199937025706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,16383,0.03211733450492223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,32767,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,32767,0.05295466880003611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,32767,0.09352533022562663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,32767,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,1,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,1,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,1,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,3,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,3,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,3,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,65535,0.03515733281771342
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,7,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,7,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,65535,0.14053866267204285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,7,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,65535,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,7,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,65535,0.08656000097592671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,15,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,15,0.010245333115259806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,15,0.010549332946538925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,15,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,31,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,31,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,31,0.00730666642387708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,31,0.007151999821265538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,127,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,63,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,63,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,63,0.007637333124876022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,63,0.005701333284378052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,127,0.010469333579142889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,127,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,127,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,511,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,255,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,255,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,1023,0.021717332303524017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,255,0.007818666597207388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,255,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,511,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,511,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,511,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,1023,0.024533333877722423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,1023,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,1023,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,4095,0.03473600000143051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,2047,0.027776000400384266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,2047,0.03615466753641764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,2047,0.008394666636983553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,2047,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,4095,0.05303466816743215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,4095,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,4095,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,8191,0.03450666616360346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,8191,0.0677706648906072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,8191,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,8191,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,16383,0.03490666548411051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,16383,0.08948799967765808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,16383,0.04637333254019419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,16383,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,32767,0.034501334031422935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,32767,0.13335466384887695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,32767,0.07919466495513916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,32767,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,1,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,1,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,1,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,1,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,3,0.010565333068370819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,3,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,3,0.007647999872763951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,3,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,65535,0.008309333274761835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,7,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,7,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,65535,0.035018667578697205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,7,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,7,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,65535,0.22127467393875122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,65535,0.14346133669217428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,15,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,15,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,15,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,15,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,31,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,31,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,63,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,31,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,31,0.00613866684337457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,63,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,63,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,63,0.007226666435599327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,127,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,127,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,127,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,127,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,255,0.018602666755517323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,255,0.018672000616788864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,255,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,255,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,511,0.022639999787012737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,511,0.025626666843891144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,511,0.008000000069538752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,511,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,1023,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,1023,0.03945599993069967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,1023,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,1023,0.01599466676513354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,2047,0.035818666219711304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,2047,0.050981332858403526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,2047,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,2047,0.020367999871571858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,4095,0.035029334326585136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,4095,0.0669706662495931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,4095,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,4095,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,8191,0.03521066655715307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,8191,0.08860799670219421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,8191,0.044981335600217186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,8191,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,16383,0.03611200054486593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,16383,0.13149333000183105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,16383,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,16383,0.07780266801516215
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,32767,0.2178879976272583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,32767,0.035887998839219414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,32767,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,1,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,32767,0.14255999525388083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,1,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,1,0.005578666925430298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,1,0.007082666580875714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,3,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,3,0.00554666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,3,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,3,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,65535,0.39134931564331055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,7,0.007989333321650824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,7,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,65535,0.27238933245340985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,7,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,65535,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,7,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,65535,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,15,0.007477333148320516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,31,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,15,0.0075093333919843035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,31,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,31,0.005578666925430298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,15,0.005999999741713206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,31,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,15,0.007802666475375493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,63,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,63,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,63,0.005690666536490123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,63,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,127,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,127,0.008592000231146812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,127,0.006159999718268712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,127,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,255,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,255,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,511,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,255,0.006815999746322632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,255,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,511,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,511,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,511,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,1023,0.013418667018413544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,1023,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,1023,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,1023,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,2047,0.013461332768201828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,2047,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,4095,0.010255999863147736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,2047,0.006837333242098491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,2047,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,4095,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,4095,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,4095,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,16383,0.01413333291808764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,8191,0.013573333621025085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,8191,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,8191,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,16383,0.014165333161751429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,8191,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,16383,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,16383,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,32767,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,32767,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,32767,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,32767,0.006650666395823161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,65535,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,65535,0.02218666672706604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,65535,0.006773333375652631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,65535,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,1,0.007871999715765318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,1,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,1,0.00554666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,1,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,3,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,3,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,3,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,3,0.005525333185990651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,7,0.007381333038210869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,131071,0.006677333265542984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,131071,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,7,0.007930666829148928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,7,0.00549333356320858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,131071,0.021850667893886566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,7,0.005589333052436511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,131071,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,15,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,15,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,15,0.006906666482488315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,15,0.0058186668902635574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,31,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,31,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,31,0.005712000032265981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,31,0.005418666948874791
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,63,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,63,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,63,0.005717333406209946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,63,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,127,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,127,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,127,0.005701333284378052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,127,0.006079999729990959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,255,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,255,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,255,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,255,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,511,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,511,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,511,0.006469333544373512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,511,0.007477333148320516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,1023,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,2047,0.006629333520929019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,1023,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,1023,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,1023,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,2047,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,2047,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,2047,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,4095,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,4095,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,4095,0.006613333399097125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,4095,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,8191,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,8191,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,8191,0.006437333300709724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,8191,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,16383,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,16383,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,16383,0.006533333410819371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,16383,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,32767,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,32767,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,32767,0.006586666529377301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,32767,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,65535,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,65535,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,65535,0.006671999891599019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,1,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,1,0.007946666950980822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,1,0.005541333307822545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,65535,0.022815999885400135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,1,0.005525333185990651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,3,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,3,0.007536000261704127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,3,0.007802666475375493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,3,0.005514666438102722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,7,0.007967999825874964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,7,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,131071,0.03919466584920883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,15,0.007978666573762894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,7,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,131071,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,7,0.005557333429654439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,31,0.00820266641676426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,131071,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,15,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,131071,0.007231999809543292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,15,0.005552000055710475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,15,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,31,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,31,0.005584000299374263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,31,0.0057386669019858045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,63,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,63,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,63,0.00554666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,63,0.007184000064929326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,127,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,127,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,127,0.0060159998635451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,127,0.006037333359320958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,255,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,255,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,255,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,255,0.00850133349498113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,511,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,511,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,511,0.0068693334857622785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,511,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,1023,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,1023,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,2047,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,1023,0.008592000231146812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,1023,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,2047,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,4095,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,2047,0.0122079998254776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,2047,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,4095,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,4095,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,4095,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,8191,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,8191,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,8191,0.006853333363930385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,8191,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,16383,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,16383,0.014186666657527288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,16383,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,32767,0.020010666300853092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,16383,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,32767,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,32767,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,32767,0.026746665438016255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,65535,0.019871999820073444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,65535,0.032431999842325844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,65535,0.0069440001000960665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,1,0.008010666817426682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,65535,0.024495999018351238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,1,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,1,0.005472000067432721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,1,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,3,0.007962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,3,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,3,0.005445333197712898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,3,0.0074560002734263735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,7,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,131071,0.05403733253479004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,7,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,131071,0.03775999943415324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,131071,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,7,0.005482666815320651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,131071,0.025920001169045765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,7,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,15,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,15,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,15,0.005557333429654439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,15,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,31,0.007818666597207388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,31,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,31,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,31,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,63,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,63,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,63,0.005477333441376686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,63,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,127,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,127,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,127,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,127,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,255,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,255,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,255,0.006698666761318843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,255,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,511,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,511,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,511,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,511,0.008101333553592363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,1023,0.014122666170199713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,1023,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,1023,0.006789333497484525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,1023,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,2047,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,2047,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,2047,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,2047,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,4095,0.013397333522637686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,4095,0.013381333400805792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,4095,0.00690133310854435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,4095,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,8191,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,8191,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,8191,0.007376000285148621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,16383,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,8191,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,16383,0.014698666830857595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,16383,0.01642666632930438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,16383,0.01404800017674764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,32767,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,32767,0.01851733277241389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,32767,0.02022933339079221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,32767,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,65535,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,65535,0.022650666534900665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,65535,0.006858666737874349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,1,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,65535,0.022698665658632915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,1,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,1,0.005610666548212369
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,1,0.005525333185990651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,3,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,3,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,3,0.005589333052436511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,3,0.005450666571656863
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,7,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,7,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,7,0.005568000177542369
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,131071,0.022863999009132385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,7,0.00559999980032444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,15,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,131071,0.033029332756996155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,15,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,131071,0.04045866678158442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,15,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,15,0.005701333284378052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,131071,0.008394666636983553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,31,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,31,0.007791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,31,0.005541333307822545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,31,0.006826666494210561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,63,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,63,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,63,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,63,0.006597333277265231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,127,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,127,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,127,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,127,0.00690133310854435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,255,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,255,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,255,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,255,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,511,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,511,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,511,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,511,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,1023,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,1023,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,1023,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,1023,0.007930666829148928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,2047,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,2047,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,2047,0.007631999750932057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,2047,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,4095,0.013450667262077332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,4095,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,4095,0.008629333227872849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,4095,0.009914666414260864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,8191,0.014271999398867289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,8191,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,8191,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,8191,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,16383,0.014533333480358124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,16383,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,16383,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,16383,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,32767,0.0266239990790685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,32767,0.01829333355029424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,32767,0.007834666719039282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,32767,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,65535,0.03224000086386999
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,65535,0.020175999651352566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,1,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,65535,0.024192000428835552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,65535,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,1,0.008021333565314611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,1,0.005695999910434087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,1,0.005552000055710475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,3,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,3,0.008042666440208754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,3,0.005770666524767876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,3,0.005519999812046687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,131071,0.026362667481104534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,7,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,7,0.00554666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,7,0.007989333321650824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,7,0.005695999910434087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,15,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,15,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,15,0.005365333209435145
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,131071,0.05575466652711233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,131071,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,131071,0.03723733375469843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,15,0.005706666658322017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,31,0.007818666597207388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,31,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,31,0.005653333539764087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,31,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,63,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,63,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,63,0.005626666670044263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,63,0.005530666559934616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,127,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,127,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,127,0.006895999734600385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,127,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,255,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,255,0.008656000097592672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,255,0.007536000261704127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,255,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,511,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,511,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,511,0.007733333234985669
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,511,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,2047,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,1023,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,1023,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,1023,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,4095,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,1023,0.007834666719039282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,2047,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,2047,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,2047,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,4095,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,4095,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,4095,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,8191,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,8191,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,8191,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,8191,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,16383,0.017642666896184284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,16383,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,16383,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,32767,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,16383,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,32767,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,32767,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,32767,0.022778667509555817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,65535,0.024149333437283833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,65535,0.05299733579158783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,1,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,65535,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,65535,0.03366400053103765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,1,0.010202666744589806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,1,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,1,0.007637333124876022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,3,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,3,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,3,0.00578666664659977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,3,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,7,0.010373333469033241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,7,0.005770666524767876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,7,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,131071,0.03194133440653483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,131071,0.06088533500830332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,7,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,15,0.010357333347201347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,131071,0.08869333068529765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,15,0.009984000275532404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,131071,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,15,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,63,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,15,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,31,0.009925333162148794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,31,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,31,0.008325333396593729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,31,0.0058133335163195925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,63,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,127,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,63,0.006917333230376244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,63,0.005754666402935982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,127,0.010453333457310995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,127,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,127,0.0075093333919843035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,255,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,255,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,255,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,255,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,511,0.01651200031240781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,511,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,511,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,511,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,1023,0.021541332205136616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,1023,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,1023,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,1023,0.012170666207869848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,2047,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,2047,0.03667200108369192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,2047,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,2047,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,4095,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,4095,0.05342933535575867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,4095,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,4095,0.021589333812395733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,8191,0.03483733286460241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,8191,0.06794133285681407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,8191,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,8191,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,16383,0.00820266641676426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,16383,0.034688000877698265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,16383,0.09054399530092876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,16383,0.04753066599369049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,1,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,1,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,32767,0.03421866645415624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,1,0.00595199999709924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,1,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,32767,0.00843733362853527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,3,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,3,0.007274666801095009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,32767,0.07935466865698497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,3,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,7,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,3,0.00595199999709924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,32767,0.13294933239618936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,7,0.01022933361430963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,7,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,7,0.0058453331391016645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,15,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,15,0.00600533311565717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,15,0.005994666367769241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,31,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,31,0.010485333700974783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,31,0.005797333394487699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,31,0.005882666756709416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,63,0.010165333126982054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,63,0.005935999875267346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,63,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,127,0.010437333335479101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,127,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,127,0.0064853330453236895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,127,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,255,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,255,0.007178666690985362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,255,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,255,0.0081386665503184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,511,0.022885332504908245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,511,0.024559999505678814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,511,0.007978666573762894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,511,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,2047,0.035386666655540466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,1023,0.0325546662012736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,1023,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,1023,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,1023,0.016447999825080235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,2047,0.050474668542544045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,2047,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,2047,0.02059200033545494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,4095,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,4095,0.06615466872851054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,4095,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,4095,0.028389332195123036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,8191,0.0354720006386439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,8191,0.08826133608818054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,8191,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,16383,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,8191,0.04611733555793762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,16383,0.132042666276296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,16383,0.03549333413441976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,16383,0.07880533238252004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,1,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,1,0.014074667046467463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,1,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,1,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,32767,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,3,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,3,0.014186666657527288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,32767,0.21888534228006998
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,7,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,32767,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,3,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,32767,0.14258133371671042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,3,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,7,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,7,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,7,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,15,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,15,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,15,0.007269333427151044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,31,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,15,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,31,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,31,0.006309333567818006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,127,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,31,0.007109333450595538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,63,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,63,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,63,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,63,0.006074666976928711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,127,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,127,0.0068693334857622785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,127,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,255,0.026522666215896606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,255,0.026837334036827087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,255,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,255,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,511,0.03649600098530451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,511,0.040522667268911995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,511,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,1023,0.036559998989105225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,1023,0.049626668294270836
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,1023,0.010245333115259806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,1023,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,2047,0.03872533390919367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,2047,0.06718933085600536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,4095,0.08935999870300293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,2047,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,2047,0.027914665639400482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,4095,0.03921066721280416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,4095,0.009861333295702934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,4095,0.0450186679760615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,8191,0.03826133410135905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,8191,0.13293866316477457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,8191,0.009872000043590864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,8191,0.07754133145014445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,16383,0.03904533386230469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,16383,0.21916800737380981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,16383,0.14195199807484946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,16383,0.010170666500926018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,1,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,1,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,1,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,32767,0.3901653289794922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,1,0.005749333028992017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,3,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,3,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,32767,0.03890133400758108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,3,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,7,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,3,0.0059199997534354525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,32767,0.010437333335479101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,7,0.008090666805704435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,7,0.005653333539764087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,32767,0.2725226680437724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,31,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,7,0.00850133349498113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,15,0.008197333042820295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,15,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,15,0.0057920000205437345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,15,0.00731733317176501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,31,0.00785600021481514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,31,0.007536000261704127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,31,0.0069973332186539965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,63,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,63,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,63,0.005712000032265981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,63,0.005877333382765452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,127,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,127,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,127,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,127,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,255,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,255,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,255,0.006784000123540561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,255,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,511,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,511,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,511,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,511,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,2047,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,1023,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,2047,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,1023,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,1023,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,1023,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,2047,0.01392000044385592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,2047,0.006831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,4095,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,4095,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,4095,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,4095,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,8191,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,8191,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,8191,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,8191,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,16383,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,16383,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,32767,0.027679999669392902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,16383,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,16383,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,32767,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,32767,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,32767,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,65535,0.020725333442290623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,65535,0.0069759997228781385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,65535,0.03429333368937174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,1,0.007647999872763951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,65535,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,1,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,1,0.005621333296100299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,1,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,3,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,3,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,3,0.0068693334857622785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,3,0.008192000289758047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,7,0.007594666754206021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,7,0.00820266641676426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,131071,0.05705066521962484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,7,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,7,0.0057920000205437345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,131071,0.02789866675933202
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,15,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,131071,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,15,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,131071,0.008165333420038223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,15,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,15,0.0058186668902635574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,31,0.008378666515151659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,31,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,31,0.005690666536490123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,31,0.005637333417932193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,63,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,63,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,63,0.0058186668902635574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,63,0.007151999821265538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,127,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,127,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,127,0.006858666737874349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,127,0.007477333148320516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,255,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,255,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,255,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,255,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,511,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,511,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,511,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,511,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,1023,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,1023,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,1023,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,1023,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,2047,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,2047,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,2047,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,2047,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,4095,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,4095,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,4095,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,4095,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,8191,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,8191,0.0220320001244545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,8191,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,8191,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,16383,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,16383,0.02474133421977361
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,16383,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,16383,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,32767,0.02149333308140437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,32767,0.0365226666132609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,32767,0.00749333327015241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,32767,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,65535,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,65535,0.05474133292833964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,1,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,1,0.007765333478649457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,65535,0.007663999994595845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,65535,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,1,0.005775999898711841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,1,0.005642666791876157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,3,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,3,0.008101333553592363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,3,0.005797333394487699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,3,0.005706666658322017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,131071,0.03276800115903219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,7,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,7,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,15,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,7,0.005935999875267346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,7,0.00573333352804184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,131071,0.09099200367927551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,31,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,15,0.008117333054542542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,131071,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,15,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,131071,0.06132799883683523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,15,0.005722666780153911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,31,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,31,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,31,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,63,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,63,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,63,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,63,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,127,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,127,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,127,0.006949333474040031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,127,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,255,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,255,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,255,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,255,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,1023,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,511,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,511,0.011733333269755045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,511,0.007269333427151044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,511,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,1023,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,1023,0.006821333120266597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,1023,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,2047,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,2047,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,2047,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,2047,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,4095,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,4095,0.02004266654451688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,4095,0.007525333513816197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,4095,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,8191,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,8191,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,8191,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,8191,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,16383,0.02271466702222824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,16383,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,16383,0.038586666186650596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,16383,0.020319999506076176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,32767,0.0290133332212766
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,32767,0.061706667145093284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,32767,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,32767,0.03523733218510946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,65535,0.03492266684770584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,65535,0.09217066566149394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,65535,0.008037333066264788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,65535,0.05256533126036326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,1,0.010565333068370819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,1,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,1,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,3,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,3,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,3,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,3,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,131071,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,131071,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,131071,0.09239466985066731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,7,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,7,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,131071,0.14578133821487427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,15,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,31,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,15,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,15,0.005877333382765452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,15,0.006864000111818314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,31,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,31,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,31,0.007130666946371396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,63,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,63,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,63,0.005882666756709416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,127,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,127,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,127,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,127,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,255,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,255,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,255,0.007829333345095316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,255,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,511,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,511,0.024735999604066212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,511,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,511,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,1023,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,1023,0.04066666712363561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,1023,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,1023,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,2047,0.03573866685231527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,2047,0.051226665576299034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,2047,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,4095,0.06790933509667714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,2047,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,4095,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,4095,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,4095,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,8191,0.08921066919962566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,8191,0.03573333223660787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,8191,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,8191,0.04686399797598521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,1,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,1,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,3,0.014469332993030548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,1,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,1,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,16383,0.008592000231146812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,3,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,3,0.00620266670982043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,16383,0.03604800005753835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,16383,0.13371200362841287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,16383,0.07893866797288258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,3,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,7,0.014186666657527288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,7,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,7,0.006234666953484218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,7,0.007376000285148621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,15,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,15,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,15,0.006197333335876465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,15,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,31,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,31,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,31,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,31,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,63,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,63,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,63,0.006927999978264173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,63,0.006927999978264173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,127,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,127,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,127,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,127,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,255,0.02657066782315572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,255,0.026730666557947796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,255,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,255,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,511,0.03623999903599421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,511,0.04046933352947235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,511,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,511,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,1023,0.03619733452796936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,1023,0.049882665276527405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,1023,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,1023,0.019920000185569126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,2047,0.03858133405447006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,2047,0.06804266571998596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,2047,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,2047,0.02775466690460841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,4095,0.03882133215665817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,4095,0.08880000313123067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,4095,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,4095,0.04550399879614512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,8191,0.13319999972979227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,8191,0.038831998904546104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,8191,0.010453333457310995
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,8191,0.07865066826343536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,1,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,1,0.022858666876951855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,16383,0.03807466725508372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,1,0.007381333038210869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,1,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,3,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,16383,0.14195733269055685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,3,0.0229066660006841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,3,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,16383,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,3,0.006858666737874349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,16383,0.22064000368118286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,7,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,15,0.022448000808556873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,7,0.022757334013779957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,7,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,7,0.006858666737874349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,15,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,15,0.007514666765928268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,15,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,31,0.022384000321229298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,31,0.022869333624839783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,31,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,31,0.006917333230376244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,63,0.022175999979178112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,63,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,63,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,63,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,127,0.022272000710169475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,127,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,127,0.008282666405042013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,127,0.007967999825874964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,255,0.04318933188915253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,255,0.04393066465854645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,255,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,255,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,511,0.042917331059773765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,511,0.05253333350022634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,511,0.013594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,511,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,1023,0.0432533323764801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,1023,0.06614399949709575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,1023,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,1023,0.02868266652027766
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,2047,0.04535999894142151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,2047,0.09165866176287334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,2047,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,2047,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,4095,0.0459146648645401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,4095,0.13499733805656433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,4095,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,4095,0.07814933359622955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,8191,0.04587199787298838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,8191,0.013674666484196981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,8191,0.2212000091870626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,8191,0.14120533068974814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,1,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,1,0.006128000095486641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,1,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,1,0.006048000107208888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,16383,0.04688533147176107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,16383,0.3940800031026204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,3,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,16383,0.26953067382176715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,3,0.005968000118931134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,3,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,16383,0.014122666170199713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,7,0.014549333602190018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,3,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,7,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,7,0.006031999985376994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,7,0.006250666454434395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,15,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,15,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,15,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,15,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,31,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,31,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,31,0.006224000205596288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,31,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,63,0.014309333016475042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,63,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,63,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,63,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,127,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,127,0.014602666099866232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,127,0.006634666894872983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,127,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,255,0.026752000053723652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,255,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,255,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,255,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,511,0.03617066641648611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,511,0.04046933352947235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,511,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,511,0.016458666572968166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,1023,0.03631466627120972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,1023,0.05007466673851013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,1023,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,1023,0.020661332954963047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,2047,0.03852800031503042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,2047,0.06770133475462596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,2047,0.01020800011853377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,4095,0.03863999992609024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,2047,0.02868266652027766
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,1,0.022650666534900665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,4095,0.09112000465393066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,1,0.02271466702222824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,3,0.022618666291236877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,4095,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,1,0.006895999734600385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,4095,0.046394666035970054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,1,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,3,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,3,0.006853333363930385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,3,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,7,0.02266666789849599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,7,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,7,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,31,0.02257599929968516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,15,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,7,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,15,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,15,0.006821333120266597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,15,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,31,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,31,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,31,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,63,0.022810667753219604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,63,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,127,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,63,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,63,0.006815999746322632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,127,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,127,0.022874665757020313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,127,0.008512000242869059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,255,0.04321066538492838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,255,0.013317332913478216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,255,0.043418665726979576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,255,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,511,0.04298133154710134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,511,0.053269331653912864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,511,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,511,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,1023,0.043237333496411644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,1023,0.06529066463311513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,1023,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,2047,0.09233066439628601
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,1023,0.02811199923356374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,2047,0.045754666129748024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,2047,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,2047,0.04518933097521464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,4095,0.045968001087506614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,4095,0.1344319979349772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,1,0.03931200007597605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,1,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,4095,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,3,0.039093332986036934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,1,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,1,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,4095,0.07784000039100647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,3,0.039306665460268654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,3,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,7,0.0395413339138031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,7,0.03993066648642222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,15,0.039306665460268654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,7,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,15,0.0391893337170283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,15,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,15,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,31,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,31,0.039077334105968475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,31,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,31,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,63,0.03872533390919367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,63,0.03885866701602936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,63,0.01138666644692421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,127,0.03872533390919367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,127,0.03898133337497711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,127,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,127,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,255,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,255,0.04014399896065394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,511,0.05299200117588043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,255,0.04049599915742874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,255,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,511,0.04075733323891958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,511,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,511,0.025653332471847534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,1023,0.04049066702524821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,1023,0.07533333202203114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,1023,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,1023,0.042394667863845825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,2047,0.044266665975252785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,2047,0.12316266695658366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,2047,0.0743146687746048
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,2047,0.013434667140245438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,4095,0.04465066889921824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,4095,0.13943466544151306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,1,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,4095,0.2090346614519755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,3,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,3,0.0068800002336502075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,4095,0.013584000368913015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,3,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,1,0.023797333240509033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,1,0.006864000111818314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,1,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,3,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,7,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,7,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,7,0.00701333334048589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,7,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,15,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,15,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,15,0.007477333148320516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,15,0.006842666616042455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,31,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,31,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,63,0.022730665902296703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,31,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,31,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,63,0.006853333363930385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,63,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,63,0.007477333148320516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,127,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,127,0.02292266736427943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,127,0.007946666950980822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,127,0.008629333227872849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,255,0.043151999513308205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,255,0.04445866743723551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,255,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,511,0.04374399781227112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,255,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,1023,0.04332800209522247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,511,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,511,0.051818668842315674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,511,0.020629333953062694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,1023,0.06723733246326447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,1023,0.028751999139785767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,1023,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,2047,0.09199999769528706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,2047,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,2047,0.046709333856900535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,2047,0.04598933458328247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,1,0.04116799930731455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,1,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,1,0.040789333482583366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,1,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,3,0.039594667653242745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,3,0.040549332896868386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,3,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,7,0.039919999738534294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,3,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,7,0.04095466683308283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,7,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,7,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,15,0.04031999905904134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,15,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,15,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,31,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,31,0.04058666775623957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,63,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,31,0.011418666690587997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,63,0.03951466580231985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,31,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,63,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,63,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,127,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,127,0.040133332212766014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,127,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,127,0.013397333522637686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,255,0.04187199970086416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,255,0.04126933217048645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,255,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,255,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,511,0.0415786678592364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,511,0.054341331124305725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,511,0.013301332791646322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,511,0.02565866708755493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,1023,0.07569600145022075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,1023,0.04164800047874451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,1023,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,1023,0.04314666489760081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,2047,0.04456533491611481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,2047,0.07428800066312154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,1,0.07133866846561432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,1,0.018581333259741466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,1,0.07188799977302551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,2047,0.12385066350301106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,2047,0.01331199953953425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,1,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,7,0.0720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,3,0.07136000196139018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,3,0.01836266616980235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,3,0.07238399982452393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,3,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,7,0.072202667593956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,7,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,7,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,15,0.07153599957625072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,15,0.0719413310289383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,15,0.018496000518401463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,63,0.07061866422494252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,15,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,31,0.0718506673971812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,31,0.07200533151626587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,31,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,31,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,63,0.07090133428573608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,63,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,63,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,127,0.07108266651630402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,127,0.07144533097743988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,127,0.022128000855445862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,127,0.022133332987626392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,255,0.07751999795436859
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,255,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,255,0.0296426663796107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,255,0.07622399926185608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,511,0.0765066643555959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,511,0.09794666369756062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,511,0.02218666672706604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,511,0.04752533137798309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,1023,0.14109866817792258
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,1023,0.07657066484292348
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,1023,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,2047,0.07898133496443431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,1,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,1023,0.078575998544693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,1,0.007749333356817563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,2047,0.2328373392422994
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,2047,0.02216533323129018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,1,0.0069759997228781385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,1,0.005578666925430298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,2047,0.14267733693122864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,3,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,3,0.008154666672150293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,3,0.005744000275929769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,15,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,3,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,7,0.008170666793982187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,7,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,7,0.005674666414658229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,31,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,7,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,15,0.008069333309928576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,15,0.005797333394487699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,15,0.008432000254591307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,31,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,31,0.007050666958093643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,31,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,63,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,63,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,127,0.007391999786098798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,63,0.005754666402935982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,255,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,63,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,127,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,127,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,127,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,255,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,255,0.006917333230376244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,255,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,511,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,511,0.01033599985142549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,511,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,511,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,1023,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,1023,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,1023,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,1023,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,2047,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,4095,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,2047,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,2047,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,2047,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,4095,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,4095,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,4095,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,8191,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,8191,0.02203733225663503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,8191,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,8191,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,16383,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,16383,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,16383,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,16383,0.016682667036851246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,32767,0.021759999295075733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,32767,0.036661334335803986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,32767,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,32767,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,65535,0.05515199899673462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,65535,0.02587733417749405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,65535,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,1,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,65535,0.03465066601832708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,1,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,1,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,1,0.005615999922156334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,3,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,3,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,3,0.005562666803598404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,3,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,131071,0.032826667030652366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,7,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,7,0.008154666672150293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,7,0.005578666925430298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,7,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,15,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,15,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,131071,0.09102400143941243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,15,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,131071,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,15,0.005621333296100299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,131071,0.06448533137639363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,31,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,31,0.007903999959429106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,31,0.006895999734600385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,63,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,31,0.0074560002734263735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,63,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,63,0.005541333307822545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,63,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,127,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,127,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,127,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,127,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,255,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,255,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,255,0.006751999879876773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,255,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,511,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,511,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,511,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,511,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,2047,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,1023,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,1023,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,1023,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,1023,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,2047,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,4095,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,2047,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,2047,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,4095,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,4095,0.02056533346573512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,4095,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,8191,0.020058666666348774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,8191,0.007093333328763644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,8191,0.028970666229724884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,8191,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,16383,0.022629333039124806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,16383,0.03826666623353958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,16383,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,16383,0.0201706662774086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,32767,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,32767,0.06217599908510844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,32767,0.033930666744709015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,32767,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,65535,0.03493333359559377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,65535,0.09282666444778442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,65535,0.0524586687485377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,65535,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,1,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,1,0.007823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,1,0.007146666447321574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,1,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,3,0.007706666365265846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,3,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,3,0.0068800002336502075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,3,0.005744000275929769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,131071,0.14563199877738953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,7,0.008021333565314611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,7,0.00814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,7,0.006581333155433337
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,131071,0.038202665746212006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,7,0.005621333296100299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,15,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,131071,0.008037333066264788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,15,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,15,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,31,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,15,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,131071,0.09155199925104777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,31,0.008325333396593729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,63,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,31,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,31,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,63,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,63,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,63,0.007002666592597961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,127,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,127,0.009632000078757605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,127,0.007226666435599327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,127,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,511,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,255,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,255,0.011530666301647821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,1023,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,1023,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,255,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,255,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,511,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,511,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,511,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,1023,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,1023,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,2047,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,2047,0.02442666639884313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,2047,0.007791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,2047,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,4095,0.023919999599456787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,4095,0.0325546662012736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,4095,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,4095,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,8191,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,8191,0.04451199869314829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,8191,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,8191,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,16383,0.034154665966828666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,16383,0.06730133295059204
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,16383,0.007967999825874964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,16383,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,32767,0.034186666210492454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,32767,0.09200533231099446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,32767,0.008373333141207695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,32767,0.052186667919158936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,65535,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,65535,0.035189333061377205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,65535,0.1386666695276896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,65535,0.08569600184758504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,1,0.040463998913764954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,1,0.041034666200478874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,1,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,3,0.04035733391841253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,131071,0.03604800005753835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,1,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,3,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,131071,0.147189329067866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,131071,0.23688000440597534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,131071,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,3,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,3,0.040789333482583366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,7,0.04041066765785217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,7,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,15,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,7,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,15,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,15,0.0415786678592364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,15,0.04087999959786733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,31,0.04030933231115341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,31,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,31,0.041120000183582306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,63,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,31,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,63,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,63,0.040896000961462654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,63,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,127,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,127,0.04060266663630804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,127,0.04052799940109253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,255,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,127,0.013605333864688873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,255,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,255,0.042277331153551735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,255,0.013466666142145792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,511,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,511,0.04232533276081085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,511,0.05542933444182078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,511,0.026485333840052288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,1023,0.042090664307276406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,1023,0.04308266441027323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,1023,0.07796800136566162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,1,0.07310399909814198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,1023,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,1,0.07316799958546956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,1,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,1,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,3,0.07272000114123027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,3,0.07397333284219106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,3,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,7,0.0726506660381953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,3,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,7,0.07309333483378093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,7,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,7,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,15,0.018570666511853535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,15,0.07406933108965556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,15,0.07323733468850453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,31,0.018474667022625606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,31,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,63,0.07177599767843883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,15,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,31,0.07327466706434886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,31,0.07351999978224437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,63,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,63,0.0730453332265218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,127,0.07227199772993724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,127,0.07129066685835521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,63,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,127,0.022181332111358643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,255,0.07691200077533722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,127,0.02253866692384084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,255,0.03002133220434189
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,255,0.07814933359622955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,511,0.07738133271535237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,255,0.022202665607134502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,511,0.047210668524106346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,511,0.09871466954549153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,511,0.022277332842350006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,1023,0.14276267091433206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,1,0.13293866316477457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,1023,0.0782293329636256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,1023,0.07796266674995422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,1,0.13491732875506082
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,1023,0.022330666581789654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,3,0.13409599661827087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,1,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,3,0.1332373321056366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,1,0.03306666761636734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,3,0.0324799989660581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,7,0.032613334556420646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,3,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,7,0.1348426640033722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,15,0.1329759955406189
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,7,0.1344319979349772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,7,0.033088001112143196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,15,0.03254399945338567
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,15,0.032645332316557564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,15,0.13352533181508383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,31,0.032405334214369454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,31,0.13473066687583923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,31,0.13498666882514954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,31,0.03312533348798752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,63,0.13180800278981528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,63,0.032501332461833954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,63,0.1330453356107076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,127,0.1341653366883596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,63,0.03275199979543686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,127,0.13593066732088724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,127,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,255,0.14317333698272705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,127,0.03991466760635376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,255,0.14389866590499878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,255,0.056458666920661926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,255,0.04068800061941147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,511,0.1439839998881022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,511,0.18408532937367758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,511,0.04071466624736786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,1023,0.1439359982808431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,511,0.0881066620349884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,1023,0.2698400020599365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,1023,0.041002665956815086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,1,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,1023,0.14882666865984598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,3,0.07302399973074596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,1,0.07417599856853485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,1,0.07416533430417378
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,1,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,3,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,7,0.07297066847483318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,3,0.07396799822648366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,3,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,15,0.0732426643371582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,7,0.07410133381684621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,7,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,15,0.07314666608969371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,7,0.01947733387351036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,31,0.07327466706434886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,15,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,31,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,15,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,31,0.07401599983374278
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,63,0.07237866520881653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,63,0.07314133147398631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,63,0.018661333868900936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,127,0.07256533205509186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,63,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,127,0.02257599929968516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,127,0.0729973316192627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,127,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,255,0.02233600119749705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,255,0.07952000200748444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,511,0.07932266592979431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,255,0.03196266790231069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,511,0.09954667091369629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,511,0.048469334840774536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,1,0.1360586682955424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,511,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,1,0.13612799843152365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,1,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,3,0.1353493332862854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,3,0.13595199584960938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,1,0.0330826664964358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,3,0.03347733368476232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,3,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,7,0.13569066921869913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,7,0.03276800115903219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,7,0.13662933309872946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,7,0.03295466552178065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,15,0.03257599969704946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,15,0.03278933217128118
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,31,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,15,0.1365653375784556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,255,0.07871466875076294
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,15,0.13689600427945456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,31,0.032773333291212715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,31,0.03271466741959254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,31,0.13640000422795615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,31,0.13497599959373474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,63,0.03236266722281774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,127,0.1378773351510366
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,63,0.1365493337313334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,127,0.1378986636797587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,63,0.1360586682955424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,63,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,127,0.04021333406368891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,255,0.14501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,127,0.040735999743143715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,255,0.1446346640586853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,255,0.05715733269850413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,511,0.04052799940109253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,255,0.04084266722202301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,511,0.08721066514650981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,1,0.2576106588045756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,1,0.06359999875227611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,511,0.1469439963499705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,511,0.18608532349268594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,1,0.2600746750831604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,1,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,3,0.06318399806817372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,3,0.25762667258580524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,3,0.06329066554705302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,3,0.26157333453496295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,7,0.06398933132489522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,15,0.258133331934611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,7,0.26083733638127643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,7,0.06518400212128957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,7,0.2606400052706401
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,15,0.0633546660343806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,31,0.2598666747411092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,15,0.2590773304303487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,15,0.06369600196679433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,31,0.2595360080401103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,63,0.06417066852251689
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,31,0.06579199930032094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,31,0.06502399841944377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,63,0.26562132438023883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,127,0.26081599791844684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,63,0.0631520003080368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,127,0.26071999470392865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,127,0.07928533355395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,255,0.2765120069185893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,127,0.07871999839941661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,255,0.10641066233317058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,255,0.279258668422699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,1,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,255,0.07926400005817413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,1,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,1,0.008282666405042013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,1,0.006805333619316419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,511,0.35359466075897217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,3,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,3,0.00847999999920527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,511,0.1657493313153585
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,3,0.006325333068768184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,3,0.008127999802430471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,7,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,511,0.2816106677055359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,7,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,7,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,7,0.005584000299374263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,511,0.07945066690444946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,15,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,15,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,15,0.006837333242098491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,15,0.005530666559934616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,31,0.00814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,31,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,63,0.005589333052436511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,31,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,31,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,63,0.26449066400527954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,63,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,63,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,255,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,63,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,127,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,127,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,127,0.007146666447321574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,127,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,255,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,255,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,1023,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,255,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,511,0.012074666718641916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,511,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,511,0.006778666749596596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,511,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,1023,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,1023,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,1023,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,2047,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,2047,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,2047,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,2047,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,4095,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,4095,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,4095,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,4095,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,8191,0.02011200040578842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,8191,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,8191,0.007701333612203598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,8191,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,16383,0.023845332364241283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,16383,0.039173332353432976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,16383,0.007727999861041705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,16383,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,32767,0.02869333326816559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,32767,0.06170133252938589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,32767,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,32767,0.03569599986076355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,65535,0.03603733330965042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,65535,0.09408000111579895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,65535,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,65535,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,1,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,1,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,1,0.007087999954819679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,1,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,3,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,3,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,3,0.007269333427151044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,3,0.0081386665503184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,7,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,7,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,131071,0.0955573320388794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,7,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,131071,0.03828266759713491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,7,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,15,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,131071,0.14525866508483887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,31,0.008623999853928884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,15,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,15,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,15,0.005744000275929769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,131071,0.008192000289758047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,31,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,31,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,31,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,63,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,63,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,63,0.005744000275929769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,63,0.007002666592597961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,255,0.012335999558369318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,127,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,127,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,127,0.007536000261704127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,127,0.00620266670982043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,255,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,255,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,255,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,511,0.011957333733638128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,511,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,511,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,511,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,1023,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,1023,0.01758933315674464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,1023,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,1023,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,2047,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,2047,0.02459733436505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,2047,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,2047,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,4095,0.023957334458827972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,8191,0.04398400088151296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,4095,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,4095,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,4095,0.014357333381970724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,16383,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,8191,0.02769600103298823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,8191,0.007514666765928268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,8191,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,16383,0.06855999926726024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,16383,0.008229333286484083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,16383,0.031125334401925404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,32767,0.03410666684309641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,32767,0.007696000238259633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,32767,0.09144533673922221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,32767,0.05176533261934916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,65535,0.03514133393764496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,65535,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,65535,0.13992533087730408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,65535,0.08573333422342937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,1,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,1,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,1,0.006927999978264173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,1,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,3,0.010421333213647207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,3,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,3,0.006949333474040031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,7,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,7,0.010144000252087912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,3,0.0069440001000960665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,7,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,131071,0.03596800069014231
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,131071,0.23577600717544556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,7,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,15,0.010224000240365664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,15,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,15,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,15,0.005744000275929769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,31,0.0068800002336502075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,31,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,131071,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,31,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,131071,0.14754133423169455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,31,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,63,0.010373333469033241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,63,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,63,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,63,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,127,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,127,0.010112000008424124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,127,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,127,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,511,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,255,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,255,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,255,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,255,0.00784533346692721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,511,0.016421332955360413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,511,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,511,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,2047,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,1023,0.0220320001244545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,1023,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,1023,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,1023,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,2047,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,2047,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,2047,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,4095,0.03369066615899404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,4095,0.052629331747690834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,4095,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,4095,0.020661332954963047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,8191,0.034416000048319496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,8191,0.0666720022757848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,8191,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,8191,0.028832000990708668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,16383,0.03443733354409536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,16383,0.08916266759236653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,16383,0.008026666939258575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,16383,0.046757335464159645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,32767,0.034202667574087776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,32767,0.00810666692753633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,32767,0.13221866885821024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,32767,0.07918933530648549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,65535,0.21887467304865518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,65535,0.03495999922355016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,65535,0.1430453360080719
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,65535,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,1,0.005882666756709416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,1,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,3,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,3,0.0058133335163195925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,7,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,7,0.0069759997228781385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,15,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,131071,0.0354666660229365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,15,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,31,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,31,0.005882666756709416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,131071,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,63,0.005946666623155276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,63,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,127,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,127,0.006528000036875407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,255,0.012448000411192576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,255,0.008016000191370646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,511,0.01421333352724711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,511,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,1023,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,131071,0.39447999000549316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,131071,0.2730026642481486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,1023,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,2047,0.028666667640209198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,2047,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,4095,0.040261333187421165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,4095,0.019952000429232914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,8191,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,8191,0.05989866455396017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,16383,0.08797333637873332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,16383,0.04246933261553446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,1,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,1,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,3,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,32767,0.12625599900881448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,7,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,3,0.005775999898711841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,32767,0.0650079995393753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,7,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,15,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,15,0.008282666405042013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,65535,0.10748799641927083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,63,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,63,0.010309333602587381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,31,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,31,0.0058453331391016645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,65535,0.20225600401560465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,127,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,1023,0.027776000400384266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,127,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,2047,0.04276266694068909
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,255,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,255,0.014085333794355392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,511,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,511,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,1023,0.014495999862750372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,2047,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,4095,0.026127999027570088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,4095,0.06783999999364217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,8191,0.08488000432650249
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,8191,0.038133333126703896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,16383,0.12071466445922852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,16383,0.05787733197212219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,1,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,1,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,32767,0.09726400176684062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,3,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,32767,0.19301867485046387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,3,0.006842666616042455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,15,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,7,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,15,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,65535,0.3341386715571086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,31,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,31,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,63,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,63,0.00589866687854131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,127,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,511,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,65535,0.17896000544230142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,127,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,255,0.008261333530147871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,255,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,511,0.028666667640209198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,1023,0.046223998069763184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,1023,0.01971199984351794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,2047,0.06550399959087372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,2047,0.025583999852339428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,4095,0.08425066868464152
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,4095,0.03815466662247976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,8191,0.05607999861240387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,16383,0.19162132342656454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,8191,0.11948266625404358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,16383,0.09585600097974141
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,1,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,1,0.007381333038210869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,3,0.0058666666348775225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,32767,0.17545600732167563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,3,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,32767,0.33397332827250165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,7,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,7,0.0058666666348775225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,15,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,65535,0.33533867200215656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,15,0.007711999739209811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,31,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,31,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,127,0.006218666831652324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,63,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,63,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,127,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,255,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,65535,0.616650660832723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,255,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,511,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,511,0.007802666475375493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,1023,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,1023,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,2047,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,2047,0.010186666622757912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,4095,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,16383,0.020037333170572918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,4095,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,8191,0.017978666971127193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,8191,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,16383,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,32767,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,32767,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,1,0.008272000278035799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,1,0.00730666642387708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,65535,0.02890666574239731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,65535,0.02404266595840454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,3,0.006906666482488315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,7,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,7,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,15,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,15,0.006821333120266597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,31,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,127,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,131071,0.038560000558694206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,31,0.007045333584149678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,131071,0.030671998858451843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,63,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,127,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,1023,0.007925333455204964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,255,0.011920000116030375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,255,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,4095,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,511,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,511,0.008122666428486506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,8191,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,8191,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,1023,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,2047,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,2047,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,4095,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,16383,0.019754666835069656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,16383,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,32767,0.018378666291634243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,32767,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,1,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,65535,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,1,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,3,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,65535,0.02384000023206075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,3,0.008272000278035799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,7,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,7,0.005525333185990651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,15,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,15,0.005562666803598404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,31,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,127,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,31,0.008272000278035799
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,63,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,131071,0.05712000032265981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,63,0.005626666670044263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,127,0.0058399997651577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,131071,0.034389334420363106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,255,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,1023,0.007829333345095316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,255,0.007706666365265846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,511,0.01595199977358182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,511,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,1023,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,2047,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,2047,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,4095,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,4095,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,8191,0.01988799994190534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,8191,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,16383,0.021669333179791767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,16383,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,32767,0.02062400057911873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,32767,0.034917332231998444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,1,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,65535,0.0462773342927297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,1,0.0057386669019858045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,3,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,3,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,65535,0.027024000883102417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,7,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,7,0.0058080001423756284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,15,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,15,0.007082666580875714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,31,0.010144000252087912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,31,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,131071,0.07644799848397572
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,63,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,63,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,127,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,131071,0.04251733422279358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,127,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,1023,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,255,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,255,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,511,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,511,0.007829333345095316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,1023,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,2047,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,2047,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,4095,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,4095,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,8191,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,8191,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,16383,0.02025066688656807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,16383,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,32767,0.024842667082945507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,32767,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,1,0.008432000254591307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,1,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,3,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,65535,0.030533333619435627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,3,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,7,0.008341333518425623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,65535,0.024645333488782246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,7,0.007631999750932057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,15,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,15,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,31,0.010154666379094124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,127,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,31,0.005610666548212369
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,131071,0.05808533231417338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,255,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,63,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,63,0.006864000111818314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,127,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,131071,0.03532266616821289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,255,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,2047,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,511,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,511,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,1023,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,1023,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,2047,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,4095,0.016447999825080235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,4095,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,8191,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,8191,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,16383,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,16383,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,32767,0.03498133271932602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,32767,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,1,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,1,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,3,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,3,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,7,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,65535,0.047770669062932335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,65535,0.026730666557947796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,7,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,15,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,15,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,31,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,131071,0.04126933217048645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,31,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,63,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,63,0.006864000111818314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,127,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,127,0.007045333584149678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,255,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,131071,0.07715733349323273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,255,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,511,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,511,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,1023,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,1023,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,2047,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,2047,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,4095,0.016415999581416447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,4095,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,8191,0.025994665920734406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,8191,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,16383,0.0308746670683225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,16383,0.016309333344300587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,32767,0.05012799799442291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,32767,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,1,0.010277333358923594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,1,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,3,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,65535,0.07402666906515758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,3,0.005935999875267346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,65535,0.04641599953174591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,7,0.006064000229040782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,7,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,15,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,31,0.006175999840100606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,15,0.005946666623155276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,31,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,63,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,63,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,127,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,127,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,131071,0.1256053348382314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,131071,0.06549866497516632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,1023,0.028207999964555103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,255,0.013562666873137156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,255,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,511,0.01974933346112569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,511,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,1023,0.016016000260909397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,2047,0.01868266612291336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,2047,0.04377600053946177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,4095,0.06769066552321117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,4095,0.028832000990708668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,8191,0.08691199620564778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,8191,0.04051200052102407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,16383,0.05933333436648051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,1,0.011322667201360067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,1,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,16383,0.12286399801572163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,3,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,3,0.005882666756709416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,7,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,7,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,32767,0.09893866380055745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,15,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,15,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,31,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,32767,0.1958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,31,0.005984000240763028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,63,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,63,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,127,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,127,0.006469333544373512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,255,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,255,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,511,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,511,0.014085333794355392
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,1023,0.046256000796953835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,1023,0.02014933278163274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,2047,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,2047,0.06643199920654297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,4095,0.08473066488901775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,4095,0.038202665746212006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,8191,0.11948800086975098
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,8191,0.05782400071620941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,1,0.01628799984852473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,1,0.0063146669417619705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,16383,0.19263466199239096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,16383,0.09708799918492635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,3,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,32767,0.33455467224121094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,3,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,7,0.016399999459584553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,7,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,15,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,31,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,15,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,31,0.007055999711155891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,63,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,32767,0.17799999316533408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,63,0.007130666946371396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,127,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,127,0.0075626665105422335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,255,0.030245333909988403
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,255,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,511,0.04752000172932943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,511,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,1023,0.06333333253860474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,1023,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,2047,0.08322133123874664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,2047,0.03772799919048945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,4095,0.11890133221944173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,4095,0.05657066901524862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,8191,0.19172799587249756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,8191,0.09684800108273824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,1,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,16383,0.17597333590189615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,1,0.005941333249211311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,16383,0.33497599760691327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,3,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,3,0.006831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,7,0.008592000231146812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,7,0.005882666756709416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,32767,0.33478931585947674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,15,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,15,0.007151999821265538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,31,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,63,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,127,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,127,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,32767,0.6179466644922892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,31,0.0058133335163195925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,63,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,255,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,255,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,511,0.01611199975013733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,511,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,4095,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,1023,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,1023,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,2047,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,2047,0.010005333150426546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,4095,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,8191,0.020618667205174763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,8191,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,16383,0.022218666970729828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,16383,0.015722667177518208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,32767,0.03622400015592575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,32767,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,1,0.005498666937152545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,3,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,65535,0.04981866478919983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,3,0.007087999954819679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,65535,0.026917333404223125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,7,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,7,0.007882666463653246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,15,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,15,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,31,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,31,0.005541333307822545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,63,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,131071,0.04230933388074239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,63,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,127,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,127,0.006021333237489064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,131071,0.0802293320496877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,255,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,255,0.00985599992175897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,511,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,4095,0.017722666263580322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,511,0.007957333077987036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,1023,0.014096000542243322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,1023,0.008112000301480293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,2047,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,2047,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,4095,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,8191,0.02646933247645696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,8191,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,16383,0.032074667513370514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,16383,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,32767,0.050144001841545105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,32767,0.024671999116738636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,1,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,1,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,3,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,3,0.006037333359320958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,65535,0.045893331368764244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,15,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,7,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,65535,0.07484800120194753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,7,0.007338666667540868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,15,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,31,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,31,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,131071,0.12638933459917703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,63,0.011530666301647821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,63,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,131071,0.06569600105285645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,127,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,127,0.006330666442712148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,255,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,2047,0.021856000026067097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,255,0.007834666719039282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,511,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,511,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,1023,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,1023,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,2047,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,4095,0.02489600082238515
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,4095,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,8191,0.03595733394225439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,8191,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,16383,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,16383,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,32767,0.08063466846942902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,32767,0.03905066599448522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,1,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,1,0.006586666529377301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,3,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,3,0.006175999840100606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,7,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,65535,0.06492266555627187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,15,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,65535,0.1267680029074351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,7,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,15,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,31,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,31,0.0063733334342638654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,63,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,63,0.007130666946371396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,255,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,127,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,127,0.007578666632374127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,255,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,131071,0.21071465810139975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,131071,0.11071999867757161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,511,0.02889599899450938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,511,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,1023,0.04674133161703745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,1023,0.02216000109910965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,2047,0.06609599788983662
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,2047,0.028229333460330963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,4095,0.08534933129946391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,4095,0.04127999891837438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,8191,0.060880000392595925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,1,0.006480000292261441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,8191,0.12210667133331299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,1,0.01623999948302905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,7,0.006469333544373512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,3,0.01613333324591319
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,3,0.008197333042820295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,7,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,15,0.016069332758585613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,15,0.007130666946371396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,31,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,16383,0.09900266925493877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,31,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,16383,0.19527467091878256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,63,0.015967999895413715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,63,0.006773333375652631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,127,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,127,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,255,0.030693332354227703
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,255,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,511,0.04693333307902018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,511,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,1023,0.0625546673933665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,1023,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,2047,0.08505066235860188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,2047,0.03815466662247976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,4095,0.12037866314252217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,4095,0.05657066901524862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,1,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,8191,0.19334399700164795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,8191,0.09735467036565144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,1,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,3,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,3,0.010245333115259806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,7,0.025744001070658367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,16383,0.17705599466959634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,7,0.0102186668664217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,16383,0.3365439971288045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,15,0.010288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,15,0.025648000339667004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,31,0.024842667082945507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,31,0.010202666744589806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,63,0.02555199960867564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,127,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,63,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,127,0.011690666278203329
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,255,0.050160000721613564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,255,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,511,0.06471466521422069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,511,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,1023,0.08197866876920064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,1023,0.037205333511034645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,2047,0.12124799688657124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,2047,0.05665599803129832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,4095,0.1933013399442037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,4095,0.09665066997210185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,8191,0.3355199893315633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,1,0.016309333344300587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,8191,0.1755946675936381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,1,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,3,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,3,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,16383,0.33674665292104083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,7,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,7,0.007274666801095009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,15,0.01634666696190834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,16383,0.6202666759490967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,31,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,31,0.006735999758044879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,63,0.016309333344300587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,63,0.007274666801095009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,127,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,127,0.008266666904091835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,255,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,255,0.030896000564098358
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,511,0.04824000100294749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,511,0.021898667017618816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,1023,0.0628053347269694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,1023,0.027744000156720478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,2047,0.040261333187421165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,2047,0.08565866947174072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,4095,0.12223466237386067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,1,0.025775998830795288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,1,0.010314666976531347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,3,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,4095,0.05886933207511902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,3,0.010186666622757912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,7,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,7,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,15,0.02569066733121872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,15,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,31,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,31,0.01022933361430963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,63,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,63,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,127,0.02571733295917511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,127,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,255,0.05008000135421753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,255,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,511,0.06550933420658112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,511,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,1023,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,1023,0.08211199939250946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,2047,0.12197867035865784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,2047,0.05680533250172933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,4095,0.19425066312154135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,1,0.044154668847719826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,3,0.04457066456476847
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,3,0.014101333916187286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,4095,0.09660800298055013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,7,0.014058666924635569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,1,0.014416000495354334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,7,0.04446400205294291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,15,0.044165333112080894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,15,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,31,0.04432533184687296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,31,0.013994666437307993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,63,0.04408533374468485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,63,0.014325333138306936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,15,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,127,0.04450133442878723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,127,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,255,0.053344001372655235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,255,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,511,0.07045866549015045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,511,0.033173332611719765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,1023,0.10539733370145161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,1023,0.05262400209903717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,2047,0.09245333075523376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,2047,0.17850667238235474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,1,0.02605866640806198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,4095,0.17237865924835205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,4095,0.3153013388315837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,3,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,1,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,3,0.026906666656335194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,7,0.02603200078010559
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,15,0.02632533262173335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,7,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,15,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,31,0.026416001220544178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,31,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,63,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,127,0.02646933247645696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,63,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,127,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,255,0.051557332277297974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,255,0.021909333765506744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,511,0.06588266789913177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,511,0.02809600035349528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,1023,0.04041066765785217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,1023,0.08269866804281871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,1,0.04508799811204275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,2047,0.059119999408721924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,1,0.014261333892742792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,2047,0.12409599622090657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,3,0.04488533238569895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,7,0.014389333625634512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,7,0.04452266792456309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,3,0.014271999398867289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,15,0.01404800017674764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,15,0.04502933224042257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,31,0.045007998744646706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,31,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,63,0.04438933233420054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,127,0.045738667249679565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,63,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,127,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,255,0.05389333268006643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,255,0.022229333718617756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,511,0.07128533224264781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,1023,0.10476799805959065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,511,0.034058667719364166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,1023,0.053541332483291626
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,1,0.08118933439254761
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,1,0.024362665911515553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,3,0.08087466657161713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,2047,0.1776906649271647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,2047,0.09398933251698811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,7,0.08138133088747661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,3,0.024122667809327442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,15,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,7,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,15,0.08185066779454549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,31,0.08150400221347809
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,31,0.024314666787783306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,63,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,63,0.0806826651096344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,127,0.08226666847864787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,127,0.028138667345046997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,255,0.09681600332260132
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,511,0.05880000193913778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,255,0.04062933226426443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,511,0.12921067078908285
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,1023,0.09813867012659709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,1,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,1023,0.19700799385706583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,1,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,3,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,3,0.0058613332609335584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,7,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,2047,0.33636267979939777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,7,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,2047,0.1781546672185262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,15,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,63,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,15,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,31,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,31,0.006053333481152852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,511,0.01642666632930438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,255,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,63,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,127,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,127,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,255,0.011952000359694162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,511,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,1023,0.013376000026861826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,1023,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,2047,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,2047,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,4095,0.018602666755517323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,4095,0.011663999408483505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,8191,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,8191,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,16383,0.03180266668399175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,16383,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,32767,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,32767,0.051882664362589516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,1,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,3,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,1,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,3,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,65535,0.07683200140794118
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,15,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,7,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,7,0.005498666937152545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,65535,0.048138668139775596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,15,0.006853333363930385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,31,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,31,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,63,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,63,0.005669333040714264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,131071,0.06677333513895671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,127,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,127,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,255,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,255,0.00789866658548514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,511,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,511,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,131071,0.12878933548927307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,1023,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,1023,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,2047,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,2047,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,4095,0.024773334463437397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,4095,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,8191,0.03514666606982549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,8191,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,16383,0.05228800078233083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,16383,0.028042666614055634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,32767,0.08136533200740814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,32767,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,1,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,65535,0.06443733473618825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,3,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,3,0.005674666414658229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,1,0.006917333230376244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,65535,0.12730133533477783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,7,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,15,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,7,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,15,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,31,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,31,0.00596266674498717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,63,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,131071,0.20900267362594604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,63,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,127,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,127,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,255,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,255,0.007823999971151352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,131071,0.11079466342926025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,511,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,511,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,1023,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,1023,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,2047,0.02810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,2047,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,4095,0.01782400036851565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,4095,0.04065600037574768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,8191,0.05932266513506571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,8191,0.023669332265853882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,16383,0.08799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,16383,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,32767,0.06121066709359487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,32767,0.12474133570988973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,1,0.04554666578769684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,1,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,65535,0.2013546625773112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,65535,0.10646933317184448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,3,0.046282668908437095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,3,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,7,0.04544533292452494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,131071,0.3572426637013753
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,31,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,7,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,15,0.04686399797598521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,63,0.04542933404445648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,15,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,31,0.04655999938646952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,131071,0.18078400691350302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,63,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,127,0.046282668908437095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,255,0.024773334463437397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,127,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,255,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,511,0.072543998559316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,1,0.08120533327261607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,1,0.02383466561635335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,511,0.036864000062147774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,1023,0.05574933191140493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,3,0.08154666423797607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,1023,0.10726400216420491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,3,0.024453334510326385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,7,0.023775999744733173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,7,0.08230400085449219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,15,0.08131200075149536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,31,0.08138133088747661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,15,0.02455466737349828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,31,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,127,0.08330666522185008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,63,0.08132799963156383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,63,0.024346667031447094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,255,0.09618133306503296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,255,0.04102933406829834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,127,0.029792000850041706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,1023,0.1969226598739624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,1,0.04264533519744873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,511,0.13011733690897623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,1,0.15221866965293884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,511,0.06089599927266439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,3,0.15152000387509665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,1023,0.10020266969998677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,3,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,7,0.1508746643861135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,7,0.04404266675313314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,15,0.043866669138272606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,31,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,15,0.1521440049012502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,31,0.15190399686495462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,127,0.15370133519172668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,127,0.05351466437180837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,63,0.04374399781227112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,63,0.15343999862670898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,255,0.07169599831104279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,255,0.18060266971588135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,1023,0.3760106563568115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,511,0.24414400259653726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,511,0.10918933153152466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,1023,0.18452266852060953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,3,0.08385599652926128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,1,0.08451199531555176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,1,0.026586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,7,0.08298133313655853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,3,0.026767998933792114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,15,0.0266239990790685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,7,0.026752000053723652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,15,0.08409600456555684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,63,0.08346133430798848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,127,0.08658666412035625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,31,0.0846720039844513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,31,0.02661866694688797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,63,0.02658133457104365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,255,0.04667733112970988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,127,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,255,0.09959466258684795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,511,0.13235732913017273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,3,0.1518346667289734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,1,0.04740799963474274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,1,0.15292800466219583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,511,0.0660159985224406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,3,0.046629334489504494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,7,0.04618666569391886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,15,0.15201066931088766
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,7,0.15221866965293884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,15,0.047509332497914634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,63,0.045994664231936135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,127,0.15648000439008078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,31,0.1537866691748301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,31,0.04721599817276001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,63,0.15615466237068176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,255,0.18143999576568604
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,127,0.05606399973233541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,511,0.24560532967249551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,511,0.11130133271217346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,3,0.08879466851552327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,255,0.07530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,1,0.2941546638806661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,1,0.08958933750788371
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,3,0.2943413257598877
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,7,0.08918933073679607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,7,0.29234133164087933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,15,0.2948853373527527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,15,0.08861333131790161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,31,0.30079466104507446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,31,0.08964266379674275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,127,0.29923200607299805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,63,0.08939199646313985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,63,0.3011626601219177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,255,0.3473759889602661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,1,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,3,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,255,0.1350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,1,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,127,0.10090133547782898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,3,0.005941333249211311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,7,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,7,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,15,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,15,0.0068800002336502075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,31,0.010144000252087912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,31,0.006101333225766818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,63,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,63,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,127,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,127,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,511,0.47486400604248047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,511,0.2104426622390747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,255,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,255,0.00785600021481514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,511,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,511,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,1023,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,1023,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,2047,0.022543999056021374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,8191,0.018101333330074947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,2047,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,4095,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,4095,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,8191,0.03578133384386698
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,16383,0.05286933481693268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,16383,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,32767,0.08205866813659668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,32767,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,1,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,1,0.006842666616042455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,3,0.009632000078757605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,65535,0.1288479963938395
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,3,0.006831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,7,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,65535,0.06897599995136261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,7,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,15,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,15,0.006890666360656421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,31,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,31,0.0058080001423756284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,63,0.011952000359694162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,63,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,255,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,131071,0.11140799522399902
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,127,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,131071,0.209824005762736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,127,0.0069759997228781385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,255,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,511,0.013440000514189402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,2047,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,511,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,1023,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,1023,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,2047,0.0288426677385966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,4095,0.04009066770474116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,4095,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,8191,0.05851200222969055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,8191,0.02481066683928172
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,16383,0.08634666601816814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,16383,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,32767,0.12426666418711345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,32767,0.06272000074386597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,1,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,1,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,3,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,3,0.005850666513045629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,7,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,65535,0.10524800419807434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,7,0.006831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,15,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,65535,0.2021333376566569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,15,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,31,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,31,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,63,0.010368000095089277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,127,0.006234666953484218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,255,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,511,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,63,0.0068800002336502075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,255,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,131071,0.18029866615931192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,131071,0.35901331901550293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,2047,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,511,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,1023,0.028624000648657482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,1023,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,2047,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,4095,0.06712000072002411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,4095,0.02554133286078771
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,8191,0.0869760016600291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,8191,0.03826666623353958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,16383,0.1206826666990916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,16383,0.056559999783833824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,32767,0.09637332955996196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,32767,0.1929759979248047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,1,0.008367999767263731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,1,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,1,0.005797333394487699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,1,0.006831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,3,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,65535,0.17771732807159424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,3,0.007770666852593422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,7,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,3,0.005770666524767876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,65535,0.3357119957605998
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,3,0.00679466687142849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,7,0.0058026667684316635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,7,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,7,0.0069440001000960665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,15,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,15,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,31,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,15,0.008405333384871483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,15,0.00589866687854131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,31,0.008112000301480293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,31,0.00578666664659977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,131071,0.6203893423080444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,31,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,127,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,63,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,63,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,131071,0.3385386864344279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,63,0.005957333371043205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,255,0.012175999581813812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,63,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,127,0.009632000078757605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,511,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,127,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,127,0.005999999741713206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,255,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,255,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,255,0.007829333345095316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,511,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,511,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,511,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,1023,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,1023,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,1023,0.007338666667540868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,1023,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,2047,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,2047,0.024373332659403484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,4095,0.007525333513816197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,2047,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,2047,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,4095,0.024005333582560223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,4095,0.032831999162832894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,4095,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,8191,0.02850666642189026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,8191,0.04491733511288961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,8191,0.007930666829148928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,8191,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,16383,0.0680213322242101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,16383,0.033999999364217125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,16383,0.00789866658548514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,16383,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,32767,0.03453333427508672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,32767,0.09220799803733826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,32767,0.05101866523424784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,32767,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,65535,0.13910399874051413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,65535,0.08371200164159139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,65535,0.0351946676770846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,65535,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,1,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,1,0.010175999874869982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,1,0.005695999910434087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,1,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,3,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,3,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,3,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,3,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,7,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,7,0.010357333347201347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,7,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,7,0.005930666501323382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,15,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,131071,0.23561600844065347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,15,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,15,0.0058666666348775225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,15,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,131071,0.14562132954597473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,31,0.0069226666043202085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,31,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,131071,0.03659733384847641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,31,0.005872000008821487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,31,0.009973333527644476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,63,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,63,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,127,0.007093333328763644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,127,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,131071,0.008656000097592672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,63,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,63,0.005930666501323382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,127,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,127,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,255,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,255,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,255,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,255,0.008026666939258575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,511,0.017786666750907898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,511,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,511,0.007178666690985362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,511,0.008069333309928576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,1023,0.02178666740655899
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,1023,0.02497066557407379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,1023,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,1023,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,2047,0.028037334481875103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,2047,0.036159999668598175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,2047,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,2047,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,4095,0.034703999757766724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,4095,0.05287466446558634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,4095,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,4095,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,8191,0.034671999514102936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,8191,0.06730666756629944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,16383,0.03479466587305069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,16383,0.08950400352478027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,8191,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,8191,0.028927999238173168
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,16383,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,16383,0.04717866579691569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,32767,0.13134933511416116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,32767,0.03485333422819773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,32767,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,32767,0.07956799864768982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,65535,0.034245334565639496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,65535,0.21937066316604614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,1,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,1,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,1,0.005519999812046687
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,65535,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,1,0.00554666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,65535,0.14447999993960062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,3,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,3,0.007093333328763644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,3,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,3,0.005626666670044263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,7,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,7,0.008069333309928576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,7,0.005648000165820122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,7,0.005589333052436511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,15,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,131071,0.03551466763019562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,15,0.00814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,15,0.005669333040714264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,15,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,31,0.008432000254591307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,31,0.0075519997626543045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,31,0.005397333453098933
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,31,0.007834666719039282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,63,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,63,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,131071,0.39552001158396405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,131071,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,131071,0.2722666660944621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,63,0.005445333197712898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,63,0.007050666958093643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,127,0.009343999748428663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,127,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,255,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,127,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,127,0.005722666780153911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,255,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,255,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,255,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,511,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,511,0.010010666524370512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,511,0.006575999781489372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,511,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,1023,0.013749333719412485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,1023,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,1023,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,1023,0.0085333331177632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,2047,0.013482666263977686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,2047,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,2047,0.006506666541099548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,2047,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,4095,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,4095,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,4095,0.008112000301480293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,4095,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,8191,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,8191,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,8191,0.007184000064929326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,8191,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,16383,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,16383,0.01617066686352094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,16383,0.008405333384871483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,16383,0.014453332871198654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,32767,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,32767,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,32767,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,32767,0.01807466646035512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,65535,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,1,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,65535,0.022709332406520844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,65535,0.008629333227872849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,1,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,65535,0.022800001005331676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,1,0.006784000123540561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,1,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,3,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,3,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,3,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,131071,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,3,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,7,0.007871999715765318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,131071,0.029103999336560566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,15,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,7,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,7,0.008314666648705801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,7,0.005770666524767876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,131071,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,31,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,15,0.007887999837597212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,131071,0.029669334491093952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,15,0.005525333185990651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,15,0.008405333384871483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,31,0.007573333258430163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,31,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,127,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,31,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,63,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,63,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,63,0.006927999978264173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,63,0.005472000067432721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,127,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,127,0.006234666953484218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,127,0.008298666526873907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,255,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,255,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,255,0.007376000285148621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,255,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,511,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,511,0.009583999713261923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,511,0.006506666541099548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,511,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,1023,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,1023,0.01595199977358182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,1023,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,1023,0.008266666904091835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,2047,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,4095,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,2047,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,2047,0.0069973332186539965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,2047,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,4095,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,4095,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,4095,0.010010666524370512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,8191,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,8191,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,8191,0.006720000257094701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,8191,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,16383,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,32767,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,16383,0.01661866654952367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,32767,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,16383,0.006586666529377301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,16383,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,32767,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,32767,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,65535,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,65535,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,65535,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,65535,0.022570667167504627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,1,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,1,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,1,0.005408000200986862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,1,0.0069226666043202085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,3,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,3,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,3,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,131071,0.021722666919231415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,131071,0.032629333436489105
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,3,0.005658666913708051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,131071,0.04043733328580856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,7,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,131071,0.0074453335255384445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,7,0.008053333188096682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,7,0.007045333584149678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,7,0.005541333307822545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,15,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,15,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,15,0.005578666925430298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,15,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,31,0.008080000057816505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,31,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,31,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,31,0.005509333064158757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,63,0.0085333331177632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,63,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,63,0.00535999983549118
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,63,0.006831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,255,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,127,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,127,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,127,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,127,0.006181333214044571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,255,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,255,0.006501333167155583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,255,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,511,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,511,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,511,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,511,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,1023,0.013557333499193192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,1023,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,1023,0.006864000111818314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,1023,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,2047,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,2047,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,2047,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,2047,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,4095,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,4095,0.013301332791646322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,4095,0.006768000001708667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,4095,0.010506667196750641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,16383,0.014186666657527288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,8191,0.014325333138306936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,8191,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,8191,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,8191,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,16383,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,16383,0.006618666773041089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,32767,0.020400000115235645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,16383,0.014570667097965876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,32767,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,32767,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,32767,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,65535,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,65535,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,65535,0.006720000257094701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,1,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,65535,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,1,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,1,0.005552000055710475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,1,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,3,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,3,0.005685333162546158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,3,0.00749333327015241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,3,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,131071,0.022175999979178112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,7,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,7,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,7,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,131071,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,7,0.005642666791876157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,131071,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,131071,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,15,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,31,0.008192000289758047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,15,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,15,0.0068800002336502075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,15,0.00549333356320858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,31,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,63,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,31,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,31,0.008378666515151659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,63,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,63,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,127,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,63,0.005664000287652016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,127,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,127,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,127,0.007055999711155891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,511,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,255,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,255,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,255,0.00850133349498113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,255,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,511,0.010224000240365664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,511,0.007514666765928268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,511,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,1023,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,1023,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,1023,0.008314666648705801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,1023,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,2047,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,2047,0.013455999394257864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,2047,0.006751999879876773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,2047,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,4095,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,4095,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,4095,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,4095,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,8191,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,8191,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,8191,0.007349333415428798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,8191,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,16383,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,16383,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,16383,0.007536000261704127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,16383,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,32767,0.026975999275843304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,32767,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,32767,0.007338666667540868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,32767,0.019754666835069656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,65535,0.032405334214369454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,65535,0.0205226664741834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,1,0.00996800015370051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,65535,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,1,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,65535,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,1,0.005877333382765452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,1,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,3,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,3,0.010565333068370819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,3,0.005834666391213735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,131071,0.026538667579491932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,3,0.006831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,131071,0.036992001036802925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,7,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,7,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,131071,0.05542399982611338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,7,0.0058133335163195925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,7,0.005685333162546158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,131071,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,15,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,15,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,15,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,31,0.010351999973257383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,31,0.009914666414260864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,31,0.005701333284378052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,31,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,63,0.009984000275532404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,63,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,63,0.008261333530147871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,63,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,127,0.010330666477481524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,127,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,127,0.006122666721542676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,127,0.006784000123540561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,255,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,255,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,255,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,255,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,511,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,511,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,511,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,511,0.008432000254591307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,1023,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,1023,0.024106666445732117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,1023,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,1023,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,2047,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,2047,0.027776000400384266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,2047,0.035760000348091125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,2047,0.007914666707317034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,4095,0.033573334415753685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,4095,0.05351466437180837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,4095,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,4095,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,8191,0.06699733436107635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,8191,0.034314667185147606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,8191,0.0081386665503184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,8191,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,16383,0.03503466645876566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,16383,0.0899839997291565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,16383,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,16383,0.008485333373149237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,32767,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,32767,0.1328000028928121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,32767,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,1,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,32767,0.07973866661389668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,1,0.010368000095089277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,1,0.006058666855096817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,1,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,3,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,3,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,3,0.0074560002734263735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,3,0.005984000240763028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,65535,0.21953600645065308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,7,0.010373333469033241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,7,0.00590933362642924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,7,0.008400000010927519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,15,0.010293333480755487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,65535,0.14297599593798319
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,15,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,15,0.005914666379491488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,65535,0.03479466587305069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,15,0.008474666625261307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,31,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,31,0.010559999694426855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,65535,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,63,0.007130666946371396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,31,0.006048000107208888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,31,0.005914666379491488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,63,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,63,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,127,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,127,0.010288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,255,0.008394666636983553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,127,0.006389333556095759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,127,0.006282666698098183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,255,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,255,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,255,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,511,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,511,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,511,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,511,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,1023,0.032618666688601174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,1023,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,1023,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,1023,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,2047,0.03586666782697042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,2047,0.05087466537952423
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,2047,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,2047,0.019904000063737232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,4095,0.03505599995454153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,4095,0.06695466736952464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,4095,0.008367999767263731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,4095,0.02845333268245061
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,8191,0.035631999373435974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,8191,0.08828266461690266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,8191,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,8191,0.045312002301216125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,16383,0.1325386663277944
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,16383,0.035301332672437034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,16383,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,16383,0.07838400204976399
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,32767,0.03571200122435888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,32767,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,32767,0.14267733693122864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,32767,0.21913599967956543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,1,0.007962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,1,0.008090666805704435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,1,0.005664000287652016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,1,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,3,0.008053333188096682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,3,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,3,0.005541333307822545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,3,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,7,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,65535,0.38948265711466473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,7,0.008176000167926153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,65535,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,65535,0.03585600107908249
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,7,0.005568000177542369
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,7,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,15,0.007957333077987036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,15,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,15,0.005562666803598404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,65535,0.2723360061645508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,15,0.005568000177542369
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,31,0.008192000289758047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,31,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,31,0.0069440001000960665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,31,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,63,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,63,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,63,0.008656000097592672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,63,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,127,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,127,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,127,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,127,0.005989333614706993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,255,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,255,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,255,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,255,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,511,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,511,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,1023,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,511,0.006821333120266597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,511,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,1023,0.014202666779359182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,1023,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,1023,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,2047,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,2047,0.014293332894643148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,2047,0.006698666761318843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,2047,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,4095,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,4095,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,4095,0.007178666690985362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,4095,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,8191,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,8191,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,16383,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,8191,0.006735999758044879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,8191,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,32767,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,16383,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,16383,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,16383,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,32767,0.02810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,32767,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,32767,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,65535,0.020400000115235645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,65535,0.03287466615438461
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,65535,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,1,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,65535,0.0249439999461174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,1,0.007514666765928268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,1,0.007184000064929326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,1,0.005461333319544792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,3,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,3,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,3,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,3,0.005615999922156334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,7,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,7,0.008021333565314611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,131071,0.03691199918588003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,131071,0.027888000011444092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,7,0.00554666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,131071,0.05754666527112325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,7,0.005615999922156334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,131071,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,15,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,15,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,15,0.005488000189264615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,15,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,31,0.007760000104705493
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,31,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,31,0.005775999898711841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,31,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,63,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,63,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,63,0.005658666913708051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,63,0.006853333363930385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,127,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,127,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,127,0.006640000268816948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,127,0.007109333450595538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,511,0.010245333115259806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,255,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,255,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,255,0.0068800002336502075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,1023,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,255,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,511,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,511,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,511,0.007946666950980822
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,1023,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,1023,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,1023,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,2047,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,2047,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,2047,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,2047,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,4095,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,4095,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,4095,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,4095,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,8191,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,8191,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,8191,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,16383,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,8191,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,16383,0.018533332894245785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,32767,0.021744000415007275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,16383,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,16383,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,32767,0.03658666710058848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,32767,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,32767,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,65535,0.05513066550095876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,65535,0.026005332668622334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,65535,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,65535,0.03387733300526937
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,1,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,1,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,1,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,3,0.010437333335479101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,3,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,131071,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,3,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,3,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,131071,0.09128000338872273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,131071,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,131071,0.06276800235112508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,7,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,15,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,7,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,15,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,7,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,7,0.007093333328763644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,15,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,15,0.005999999741713206
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,31,0.01003200002014637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,31,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,31,0.007082666580875714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,63,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,127,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,63,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,63,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,63,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,127,0.010197333370645842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,255,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,127,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,127,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,255,0.018277333428462345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,255,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,255,0.00850133349498113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,511,0.0229066660006841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,511,0.0249439999461174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,511,0.008517333616813024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,1023,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,511,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,2047,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,1023,0.03314133236805598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,1023,0.03994133323431015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,1023,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,2047,0.051498666405677795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,2047,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,2047,0.020527999848127365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,4095,0.035274667044480644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,4095,0.0668213317791621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,4095,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,8191,0.08936533331871033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,8191,0.03569599986076355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,8191,0.04614933331807455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,4095,0.028229333460330963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,8191,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,16383,0.03509866694609324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,16383,0.07778133451938629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,16383,0.13300266861915588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,1,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,16383,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,1,0.014021333307027817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,1,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,1,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,3,0.014074667046467463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,3,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,3,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,3,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,7,0.014058666924635569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,7,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,7,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,15,0.013957332819700241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,7,0.006853333363930385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,32767,0.035760000348091125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,15,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,15,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,32767,0.22130133708318075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,15,0.006053333481152852
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,32767,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,31,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,31,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,32767,0.14309866229693094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,31,0.007536000261704127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,31,0.006021333237489064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,63,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,63,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,63,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,127,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,63,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,127,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,127,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,127,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,255,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,255,0.026954665780067444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,255,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,255,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,511,0.03616533428430557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,511,0.04029866556326548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,511,0.010234666367371878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,511,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,1023,0.0366239994764328
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,1023,0.049973333875338234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,1023,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,1023,0.019882666567961376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,2047,0.03834133346875509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,2047,0.06764266888300578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,2047,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,2047,0.027952000498771667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,4095,0.08990933497746785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,4095,0.039066667358080544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,4095,0.044879997769991554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,4095,0.010250666489203772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,8191,0.03875733415285746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,8191,0.1337279975414276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,8191,0.010314666976531347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,8191,0.07792533437410991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,16383,0.21991467475891113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,16383,0.039162665605545044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,16383,0.010197333370645842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,16383,0.14246933658917746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,1,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,1,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,1,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,1,0.007050666958093643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,3,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,3,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,3,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,32767,0.03923200070858002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,32767,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,3,0.007087999954819679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,7,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,32767,0.27139200766881305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,7,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,7,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,32767,0.3945866823196411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,7,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,15,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,15,0.014682666709025701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,15,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,15,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,31,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,31,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,31,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,31,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,63,0.014639999717473984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,63,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,63,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,63,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,127,0.014357333381970724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,255,0.02699733277161916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,127,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,127,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,127,0.008394666636983553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,255,0.026394667724768322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,255,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,255,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,511,0.03585600107908249
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,511,0.041109333435694374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,511,0.010469333579142889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,511,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,1023,0.03659199923276901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,1023,0.049365331729253135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,1023,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,1023,0.019717333217461903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,2047,0.038405333956082664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,2047,0.06879466772079468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,2047,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,2047,0.027829334139823914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,4095,0.08931733171145122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,4095,0.03898133337497711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,4095,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,4095,0.04513599971930186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,8191,0.038319999972979225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,1,0.022677332162857056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,1,0.023541333774725597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,1,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,1,0.007477333148320516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,3,0.022757334013779957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,3,0.022805333137512207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,8191,0.134250670671463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,8191,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,3,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,8191,0.07745600243409474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,3,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,7,0.022458667556444805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,7,0.02276266614596049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,7,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,7,0.008341333518425623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,15,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,15,0.022874665757020313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,15,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,15,0.0068853336075941724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,31,0.022757334013779957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,31,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,31,0.00816000004609426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,31,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,63,0.02266666789849599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,63,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,63,0.006714666883150737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,63,0.006864000111818314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,127,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,127,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,127,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,127,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,255,0.042805333932240806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,255,0.043552001317342125
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,255,0.013306666165590286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,255,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,511,0.043893332282702126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,511,0.05262400209903717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,511,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,511,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,1023,0.06526933113733928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,1023,0.028101332485675812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,1023,0.043680002291997276
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,1023,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,2047,0.04642133414745331
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,2047,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,2047,0.09179199735323589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,2047,0.04474133253097534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,4095,0.04667733112970988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,4095,0.013701333353916803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,4095,0.13513599832852682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,4095,0.07758933305740356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,8191,0.22103999058405557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,1,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,8191,0.1414240002632141
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,1,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,8191,0.046426668763160706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,1,0.007525333513816197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,8191,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,1,0.007541333635648091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,3,0.022757334013779957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,3,0.023541333774725597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,3,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,3,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,7,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,7,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,15,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,7,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,15,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,7,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,15,0.006815999746322632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,15,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,31,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,31,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,31,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,63,0.022533332308133442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,31,0.0085333331177632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,63,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,63,0.007573333258430163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,127,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,63,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,127,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,127,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,127,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,255,0.04388799766699473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,255,0.043231998880704246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,255,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,255,0.015754666179418564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,511,0.04318400224049886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,511,0.05208533505598704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,511,0.014074667046467463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,511,0.019962667177120846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,1023,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,1023,0.0659093310435613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,1023,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,1023,0.028234665592511494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,2047,0.04577599962552389
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,2047,0.04520000020662943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,2047,0.09232532978057861
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,2047,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,1,0.039274667700131737
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,1,0.03932266682386398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,4095,0.046896000703175865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,1,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,4095,0.13518399993578592
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,3,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,1,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,4095,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,4095,0.07810133198897044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,3,0.039290666580200195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,3,0.011274666835864386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,7,0.03932799895604452
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,3,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,7,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,15,0.03962666789690653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,7,0.040336000422636666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,15,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,7,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,15,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,15,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,31,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,31,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,31,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,31,0.011418666690587997
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,63,0.0391839991013209
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,63,0.03915199885765711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,63,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,127,0.0391893337170283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,127,0.04004266609748205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,127,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,255,0.040394666294256844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,127,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,255,0.04051200052102407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,255,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,255,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,511,0.04140799989302953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,511,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,511,0.05384000142415365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,511,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,1023,0.04102933406829834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,1023,0.07654933134714763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,1023,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,1023,0.0432586669921875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,2047,0.0444213350613912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,1,0.008634666601816813
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,2047,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,2047,0.12286399801572163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,2047,0.07526400188604991
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,1,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,1,0.0074560002734263735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,1,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,4095,0.1395786702632904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,4095,0.04460800190766653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,3,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,3,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,4095,0.013359999905029932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,4095,0.2099306583404541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,3,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,3,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,15,0.008282666405042013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,7,0.007514666765928268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,7,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,7,0.0081386665503184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,7,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,15,0.008197333042820295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,31,0.007178666690985362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,15,0.006773333375652631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,15,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,31,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,31,0.007818666597207388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,31,0.007151999821265538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,63,0.009343999748428663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,63,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,63,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,63,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,127,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,255,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,127,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,127,0.006090666477878888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,127,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,255,0.009573333586255709
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,255,0.009557333464423815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,255,0.007594666754206021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,511,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,511,0.010357333347201347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,511,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,511,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,1023,0.01227733368674914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,1023,0.012416000167528788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,1023,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,1023,0.00843733362853527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,4095,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,2047,0.014325333138306936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,2047,0.014581333845853806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,2047,0.007706666365265846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,2047,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,4095,0.0145066666106383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,4095,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,4095,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,8191,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,8191,0.022015998760859173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,8191,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,8191,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,16383,0.017877332866191864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,16383,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,16383,0.008346666892369589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,16383,0.016384000579516094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,32767,0.022319999833901722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,32767,0.036559998989105225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,32767,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,32767,0.02221333235502243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,65535,0.026026666164398193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,65535,0.05619200070699056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,65535,0.007738666608929634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,65535,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,1,0.008245333408315977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,1,0.00816000004609426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,1,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,1,0.00789866658548514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,3,0.008154666672150293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,3,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,3,0.007797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,3,0.006751999879876773
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,7,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,7,0.008314666648705801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,7,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,131071,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,131071,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,7,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,131071,0.09161600470542908
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,15,0.008256000156203905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,15,0.00808533343176047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,15,0.005834666391213735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,15,0.007269333427151044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,31,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,131071,0.06190933287143707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,63,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,31,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,31,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,31,0.006789333497484525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,63,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,63,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,63,0.006741333131988843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,127,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,255,0.009546666716535887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,127,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,127,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,127,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,255,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,255,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,1023,0.01201066623131434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,255,0.007871999715765318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,511,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,511,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,2047,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,511,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,511,0.007525333513816197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,1023,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,1023,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,1023,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,2047,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,2047,0.007882666463653246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,2047,0.010378666842977205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,4095,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,4095,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,4095,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,4095,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,8191,0.020207999895016353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,8191,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,16383,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,8191,0.008021333565314611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,8191,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,16383,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,16383,0.038389332592487335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,16383,0.020389333367347717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,32767,0.028965334097544353
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,32767,0.06239999830722809
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,32767,0.0074453335255384445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,32767,0.035189333061377205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,65535,0.0359253336985906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,65535,0.09338133533795674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,65535,0.008207999790708223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,65535,0.05297600229581197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,1,0.040362666050593056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,1,0.04095466683308283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,1,0.011546666423479715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,1,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,131071,0.037776000797748566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,3,0.04035199930270513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,131071,0.1471680005391439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,131071,0.008341333518425623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,3,0.04121600091457367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,3,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,131071,0.09241066376368205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,3,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,7,0.040133332212766014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,7,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,7,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,7,0.040250666439533234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,15,0.041146665811538696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,15,0.04029333343108495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,31,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,15,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,15,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,31,0.04013866682847341
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,31,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,63,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,31,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,63,0.039962666730086006
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,63,0.04068266600370407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,63,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,127,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,127,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,127,0.0402399996916453
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,127,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,255,0.04131199916203817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,255,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,255,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,255,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,511,0.041573333243529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,511,0.054474666714668274
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,511,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,1023,0.04085333396991094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,511,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,1023,0.07701333363850911
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,1023,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,1023,0.04295999805132548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,2047,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,2047,0.04507733384768168
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,2047,0.12341866890589397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,1,0.07257600128650665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,1,0.07176533341407776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,2047,0.07532266775767009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,1,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,3,0.07217599948247273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,3,0.07191999753316243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,1,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,3,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,3,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,7,0.07203199962774913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,7,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,7,0.07205333312352498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,7,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,15,0.07158400118350983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,15,0.07195733487606049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,15,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,15,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,31,0.07237333556016286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,31,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,31,0.07187200089295705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,31,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,63,0.0710399995247523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,63,0.07106666763623555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,63,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,127,0.0718399981657664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,63,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,127,0.07134933272997539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,255,0.07747733096281688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,127,0.021909333765506744
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,127,0.022111999491850536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,255,0.07690666615962982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,255,0.02219199885924657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,255,0.03038399914900462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,511,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,511,0.07764266431331635
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,511,0.0981760025024414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,511,0.04730666677157084
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,1023,0.07724266747633617
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,1023,0.14194132884343466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,1023,0.02216000109910965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,2047,0.08080000181992848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,1023,0.07940799991289775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,2047,0.23293334245681763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,2047,0.022405333817005157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,1,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,2047,0.14166399836540222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,1,0.07315200070540111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,1,0.0729973316192627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,1,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,3,0.07336000104745229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,3,0.018602666755517323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,3,0.07344533503055573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,3,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,7,0.07306133210659027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,7,0.07261333366235097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,7,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,15,0.07285866638024648
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,7,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,15,0.07364266614119212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,15,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,31,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,15,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,31,0.07297066847483318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,31,0.07303999861081441
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,31,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,63,0.07205866773923238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,63,0.07239466905593872
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,63,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,63,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,127,0.07268799841403961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,127,0.07211733361085255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,127,0.022245332598686218
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,127,0.02231466770172119
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,255,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,255,0.07809066772460938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,255,0.07816533247629802
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,255,0.03052799900372823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,511,0.047322665651639305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,511,0.07855466504891713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,511,0.09870400031407674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,511,0.022255999346574146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,1023,0.07760533193747203
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,1023,0.14306666453679404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,1023,0.07896533111731212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,1023,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,1,0.13500266273816428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,1,0.03258133431275686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,1,0.13433067003885904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,1,0.03298133363326391
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,3,0.13398399949073792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,3,0.13457600275675455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,7,0.13354133566220602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,3,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,3,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,7,0.0325546662012736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,7,0.1344213287035624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,7,0.03299200038115183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,15,0.13344533244768778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,15,0.13424533605575562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,15,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,31,0.13391466935475668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,15,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,31,0.1350879967212677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,63,0.13201600313186646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,31,0.03319466610749563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,31,0.03310933212439219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,63,0.13327466448148093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,63,0.03238933285077413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,63,0.03321066747109095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,127,0.039066667358080544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,127,0.13593600193659464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,127,0.13639466961224875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,127,0.03993066648642222
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,255,0.14468266566594443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,255,0.14500799775123596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,511,0.14495999614397684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,255,0.05776533484458923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,511,0.18410134315490723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,511,0.04092800120512644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,1,0.008250666782259941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,1023,0.03989866624275843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,1,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,1,0.007002666592597961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,511,0.08805867036183675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,1,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,3,0.007765333478649457
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,3,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,3,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,3,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,1023,0.15016000469525656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,1023,0.14524799585342407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,7,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,7,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,1023,0.27342400948206586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,7,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,7,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,15,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,15,0.007685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,15,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,31,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,15,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,31,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,31,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,31,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,63,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,255,0.03991466760635376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,63,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,63,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,255,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,63,0.007381333038210869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,127,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,255,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,127,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,127,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,127,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,255,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,255,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,511,0.011957333733638128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,511,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,511,0.006821333120266597
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,511,0.008000000069538752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,1023,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,1023,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,1023,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,4095,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,1023,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,2047,0.016421332955360413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,4095,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,2047,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,2047,0.007087999954819679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,2047,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,4095,0.02082666630546252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,16383,0.022863999009132385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,4095,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,8191,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,8191,0.028602667152881622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,8191,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,16383,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,8191,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,16383,0.03886933376391729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,16383,0.007525333513816197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,32767,0.029152000943819683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,32767,0.06160533428192139
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,32767,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,32767,0.0347680002450943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,65535,0.09322667121887207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,65535,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,65535,0.008186666915814081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,65535,0.05266666909058889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,1,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,1,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,1,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,1,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,3,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,3,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,3,0.007247999931375186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,3,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,7,0.008154666672150293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,7,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,7,0.007055999711155891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,131071,0.0377866675456365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,7,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,131071,0.14661332964897156
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,131071,0.008256000156203905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,15,0.008074666683872541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,31,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,131071,0.0918239951133728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,15,0.008133333176374435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,15,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,15,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,31,0.008010666817426682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,31,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,31,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,63,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,63,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,63,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,255,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,63,0.007050666958093643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,127,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,127,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,127,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,127,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,255,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,255,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,255,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,511,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,511,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,511,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,511,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,1023,0.01639466608564059
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,1023,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,1023,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,1023,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,2047,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,2047,0.02473066747188568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,2047,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,2047,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,4095,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,4095,0.03299733251333237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,4095,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,4095,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,8191,0.028031999866167705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,8191,0.04465599854787191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,16383,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,8191,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,8191,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,16383,0.0341333324710528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,16383,0.0684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,16383,0.03145066648721695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,32767,0.035173334181308746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,32767,0.0921013355255127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,32767,0.05096533397833506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,32767,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,65535,0.0354666660229365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,65535,0.13980799913406372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,65535,0.008416000132759413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,1,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,65535,0.08585600058237712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,1,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,3,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,3,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,7,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,7,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,15,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,15,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,31,0.00731733317176501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,31,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,131071,0.14686399698257446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,131071,0.007791999727487564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,63,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,131071,0.036917333801587425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,63,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,127,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,131071,0.23542932669321695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,1023,0.019989332805077236
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,127,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,255,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,511,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,2047,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,255,0.008058666562040647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,511,0.008565333361426989
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,1023,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,2047,0.029045333464940388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,4095,0.04075733323891958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,4095,0.01825599993268649
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,8191,0.06011733412742615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,8191,0.02462933212518692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,16383,0.08844799796740214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,16383,0.04060266663630804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,32767,0.1253706713517507
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,1,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,32767,0.06205866734186808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,1,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,65535,0.20180267095565796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,3,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,7,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,3,0.0068853336075941724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,65535,0.10667733351389568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,7,0.005685333162546158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,15,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,15,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,31,0.008250666782259941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,63,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,63,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,127,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,127,0.00842666688064734
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,511,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,255,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,131071,0.3593440055847168
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,1023,0.028938665986061096
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,255,0.007621333623925845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,511,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,1023,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,131071,0.18005865812301636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,2047,0.043354665239652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,8191,0.03787733366092046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,2047,0.01766933376590411
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,4095,0.06774400174617767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,4095,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,8191,0.08549867073694865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,16383,0.12131200234095256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,16383,0.058634668588638306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,32767,0.19247466325759888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,32767,0.09716266393661499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,1,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,1,0.0069973332186539965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,3,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,3,0.00702400008837382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,7,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,65535,0.33640531698862713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,31,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,7,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,65535,0.17803200085957846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,15,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,15,0.006906666482488315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,131071,0.62062935034434
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,31,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,63,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,63,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,127,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,127,0.007184000064929326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,255,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,255,0.008047999814152718
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,511,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,511,0.008672000219424566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,1023,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,1023,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,131071,0.33691199620564777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,2047,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,2047,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,4095,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,4095,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,8191,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,8191,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,32767,0.024661332368850708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,16383,0.019914666811625164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,16383,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,32767,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,1,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,1,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,65535,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,3,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,7,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,65535,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,3,0.007391999786098798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,7,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,15,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,15,0.007338666667540868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,31,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,31,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,63,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,63,0.006864000111818314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,127,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,127,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,131071,0.030853333572546642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,511,0.007477333148320516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,131071,0.03920533259709676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,255,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,255,0.007925333455204964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,511,0.015664000064134598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,1023,0.016741332908471424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,2047,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,1023,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,2047,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,4095,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,4095,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,8191,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,8191,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,16383,0.01995733380317688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,16383,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,32767,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,32767,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,1,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,1,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,65535,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,3,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,65535,0.024005333582560223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,3,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,7,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,7,0.006895999734600385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,15,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,15,0.007120000198483467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,31,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,31,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,63,0.011434666812419891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,131071,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,63,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,127,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,131071,0.05716800192991892
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,127,0.0068693334857622785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,255,0.01191466674208641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,255,0.008005333443482717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,511,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,511,0.0081386665503184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,1023,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,4095,0.010159999753038088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,1023,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,2047,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,2047,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,4095,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,8191,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,8191,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,16383,0.02024000013868014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,16383,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,32767,0.024746666351954143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,32767,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,1,0.00847999999920527
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,1,0.007231999809543292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,3,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,3,0.008053333188096682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,65535,0.02365333338578542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,65535,0.030832000076770782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,15,0.008506666868925095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,7,0.008453333129485449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,7,0.007055999711155891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,63,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,63,0.005605333174268405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,31,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,15,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,31,0.009839999799927076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,127,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,127,0.008389333263039589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,131071,0.05852800110975901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,255,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,131071,0.035029334326585136
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,255,0.007738666608929634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,511,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,511,0.007877333089709282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,1023,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,1023,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,2047,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,2047,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,4095,0.016490666816631954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,4095,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,8191,0.01970133309563001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,8191,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,16383,0.0220320001244545
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,16383,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,32767,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,32767,0.0353973334034284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,1,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,1,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,65535,0.048613334695498146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,3,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,65535,0.026746665438016255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,3,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,7,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,7,0.008394666636983553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,31,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,15,0.005797333394487699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,31,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,131071,0.07869866490364075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,63,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,63,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,127,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,131071,0.04190400242805481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,127,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,255,0.013264000415802002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,255,0.008143999924262365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,511,0.01971199984351794
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,511,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,1023,0.0284853329261144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,1023,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,2047,0.043050666650136314
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,2047,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,4095,0.06804800033569336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,4095,0.02622933437426885
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,8191,0.0867146650950114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,8191,0.03847466657559077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,16383,0.05764266848564148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,16383,0.12167466680208842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,1,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,1,0.0069440001000960665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,32767,0.19404800732930502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,3,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,7,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,32767,0.09831999739011128
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,15,0.010496000448862711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,7,0.007477333148320516
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,15,0.007205333560705185
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,31,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,31,0.0058666666348775225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,63,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,63,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,127,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,127,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,255,0.020810666183630627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,255,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,511,0.029093332588672638
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,511,0.013482666263977686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,1023,0.04651733239491781
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,1023,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,65535,0.17883199453353882
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,65535,0.3400213321050008
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,2047,0.0663679987192154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,2047,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,4095,0.08387733499209087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,4095,0.03808533400297165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,8191,0.12013333042462666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,8191,0.057018667459487915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,1,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,16383,0.19299733638763428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,16383,0.09714133540789287
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,1,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,3,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,3,0.006773333375652631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,7,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,15,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,7,0.007146666447321574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,15,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,32767,0.1773279905319214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,31,0.005690666536490123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,31,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,32767,0.33350932598114014
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,63,0.011407999942700068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,63,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,127,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,127,0.007594666754206021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,255,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,255,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,511,0.015935999651749928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,511,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,1023,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,1023,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,2047,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,2047,0.00985599992175897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,4095,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,65535,0.616650660832723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,16383,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,65535,0.33527998129526776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,4095,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,8191,0.020554666717847187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,8191,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,16383,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,32767,0.03646933287382126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,1,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,32767,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,1,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,3,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,3,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,7,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,65535,0.02644266684850057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,7,0.007093333328763644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,65535,0.04922133187452952
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,15,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,63,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,15,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,31,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,31,0.007221333061655362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,255,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,131071,0.07832000156243642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,63,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,127,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,511,0.007706666365265846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,127,0.007226666435599327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,255,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,131071,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,511,0.01599466676513354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,1023,0.013674666484196981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,1023,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,2047,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,2047,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,4095,0.017978666971127193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,4095,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,8191,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,8191,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,16383,0.03215466688076655
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,16383,0.016271999726692837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,32767,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,32767,0.02473066747188568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,1,0.010565333068370819
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,1,0.007146666447321574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,65535,0.07645866771539052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,3,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,3,0.0058613332609335584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,65535,0.04602666695912679
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,7,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,7,0.006906666482488315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,15,0.01157333329319954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,31,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,15,0.00578666664659977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,31,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,63,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,63,0.008357333640257517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,131071,0.066021333138148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,131071,0.12728533148765564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,127,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,127,0.006474666918317477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,511,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,255,0.02186133215824763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,255,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,511,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,1023,0.04629333317279816
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,1023,0.020400000115235645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,2047,0.06553066770235698
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,2047,0.025989333788553875
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,4095,0.03851199895143509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,4095,0.08569600184758504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,8191,0.12238400181134541
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,8191,0.05734399954477946
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,1,0.016042667130629223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,1,0.0074453335255384445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,3,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,3,0.007231999809543292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,7,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,16383,0.19458667437235513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,7,0.007184000064929326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,15,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,15,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,16383,0.09678399562835693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,63,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,31,0.016282666474580765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,127,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,31,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,63,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,32767,0.339194655418396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,127,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,255,0.03068266560633977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,1023,0.06260266900062561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,255,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,511,0.04715733230113983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,511,0.01951466624935468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,32767,0.17792532841364542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,1023,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,2047,0.036831999818483986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,2047,0.084197332461675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,4095,0.12107200423876445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,4095,0.05670933425426483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,8191,0.1932213306427002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,8191,0.09685333569844563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,1,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,1,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,16383,0.1761173407236735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,3,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,3,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,7,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,7,0.008383999889095625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,15,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,16383,0.33661333719889325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,15,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,31,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,31,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,63,0.016314666718244553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,63,0.00696000022192796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,255,0.031210665901501972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,127,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,127,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,511,0.04764799773693085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,32767,0.6246986786524454
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,255,0.01393066719174385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,32767,0.3346879879633586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,511,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,1023,0.06351466476917267
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,1023,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,2047,0.0846666693687439
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,2047,0.03834133346875509
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,4095,0.12165866295496623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,1,0.02588266630967458
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,1,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,4095,0.05663466453552246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,3,0.025797332326571148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,7,0.025759999950726826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,3,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,7,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,8191,0.1962933341662089
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,8191,0.09686932961146037
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,15,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,15,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,31,0.025663999219735462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,31,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,63,0.024656000236670177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,63,0.010293333480755487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,127,0.02568000058333079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,127,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,255,0.05064000189304352
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,255,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,511,0.06558933357397716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,511,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,1023,0.08288000027338664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,1023,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,2047,0.12150399883588155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,2047,0.05657066901524862
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,4095,0.19394665956497192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,4095,0.0965119997660319
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,1,0.025829332570234936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,3,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,8191,0.3375306526819865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,1,0.010490667074918747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,3,0.010170666500926018
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,15,0.0259253333012263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,7,0.02584533393383026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,8191,0.1771519978841146
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,7,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,15,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,31,0.02593066543340683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,63,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,31,0.010266666611035665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,63,0.010288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,255,0.04982399940490723
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,127,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,127,0.026565333207448322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,511,0.06545066833496094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,255,0.020389333367347717
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,511,0.025770666698614757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,1023,0.0817333310842514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,1023,0.038373333712418876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,2047,0.12220266461372375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,1,0.04484266539414724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,2047,0.056789333621660866
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,1,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,4095,0.09697066744168599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,3,0.044490665197372437
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,3,0.014426667243242264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,7,0.04390400151411692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,4095,0.19691733519236246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,7,0.014469332993030548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,15,0.044495999813079834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,15,0.014325333138306936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,31,0.044826666514078774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,31,0.014325333138306936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,63,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,63,0.04437866806983948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,127,0.04480533301830292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,127,0.01632533346613248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,255,0.053488001227378845
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,255,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,511,0.07072533170382182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,1023,0.10414933164914449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,511,0.033029332756996155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,1023,0.05318933228651682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,2047,0.17714667320251465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,1,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,1,0.00559999980032444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,2047,0.09370666742324829
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,3,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,3,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,7,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,7,0.006517333288987477
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,15,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,4095,0.3144586682319641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,15,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,4095,0.17361066738764444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,31,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,31,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,63,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,255,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,511,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,63,0.006042666733264923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,1023,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,127,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,127,0.007344000041484833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,255,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,511,0.008240000034372011
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,1023,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,2047,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,2047,0.009546666716535887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,4095,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,4095,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,8191,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,8191,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,16383,0.03200533241033554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,16383,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,32767,0.0514933317899704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,32767,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,1,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,1,0.005973333492875099
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,3,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,3,0.0069226666043202085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,65535,0.07652799785137177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,7,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,15,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,7,0.0058133335163195925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,65535,0.04533333579699198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,15,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,31,0.009872000043590864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,63,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,31,0.006826666494210561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,127,0.006448000048597653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,63,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,127,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,255,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,255,0.007818666597207388
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,511,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,131071,0.12716266512870789
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,511,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,131071,0.0658079981803894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,1023,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,1023,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,2047,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,2047,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,8191,0.03640000025431315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,4095,0.011621333658695221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,4095,0.02483733246723811
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,8191,0.01637866720557213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,16383,0.053818667928377785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,16383,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,32767,0.08146666487058003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,32767,0.03852266569932302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,65535,0.06421866516272227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,65535,0.12677866220474243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,1,0.01381333296497663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,1,0.044826666514078774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,3,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,3,0.04527466495831808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,7,0.0444213350613912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,15,0.044394666949907936
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,31,0.0450186679760615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,7,0.014469332993030548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,15,0.014416000495354334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,63,0.0440533310174942
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,131071,0.21114667256673178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,63,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,131071,0.11044266819953918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,31,0.014362666755914688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,127,0.04572266836961111
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,127,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,255,0.021984001000722248
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,255,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,511,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,511,0.07099199791749318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,1,0.0806879997253418
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,3,0.08118933439254761
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,1023,0.10529067118962605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,1023,0.05373333394527435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,2047,0.1777013341585795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,1,0.024271999796231587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,7,0.08036266764005025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,3,0.024154665569464367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,2047,0.09435733159383138
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,7,0.02380799998839696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,15,0.08116800089677174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,15,0.024314666787783306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,31,0.08102933565775554
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,63,0.07963733375072479
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,31,0.02421333392461141
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,63,0.02405333270629247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,127,0.08345599969228108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,127,0.028234665592511494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,255,0.09597866733868916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,511,0.12880000472068787
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,255,0.04041066765785217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,511,0.05890133480230967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,1023,0.19682133197784424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,1023,0.09905067086219788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,1,0.023898666103680927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,2047,0.3364106814066569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,3,0.023775999744733173
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,1,0.08157866696516673
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,2047,0.1783733367919922
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,3,0.08250666658083598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,15,0.08192533254623413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,15,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,31,0.08141866823037465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,7,0.0817386656999588
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,7,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,63,0.024160000185171764
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,31,0.02422400067249934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,127,0.08340799808502197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,63,0.08238933483759563
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,127,0.02914133419593175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,511,0.12998400131861368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,255,0.09667733311653137
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,255,0.041573333243529
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,1,0.15100266536076865
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,1023,0.19722666343053183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,511,0.06071466704209646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,7,0.15145066380500793
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,1023,0.10044800241788228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,7,0.04359466830889384
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,3,0.15077867110570273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,3,0.044218664367993675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,15,0.04384533564249674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,15,0.1514026621977488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,31,0.1518933375676473
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,127,0.15600533286730447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,31,0.044138665000597634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,63,0.15351466337839761
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,63,0.04363733530044556
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,127,0.05446400245030721
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,255,0.18142932653427124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,1,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,511,0.24436799685160318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,3,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,1,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,3,0.005642666791876157
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,7,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,7,0.007130666946371396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,15,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,15,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,31,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,511,0.11036800344785054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,31,0.005690666536490123
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,1,0.04345066845417023
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,63,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,63,0.007466666400432587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,255,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,1023,0.3776533206303914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,511,0.013349333157142004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,1023,0.1871839960416158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,127,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,127,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,255,0.008037333066264788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,511,0.008197333042820295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,1023,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,4095,0.012389333297808966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,1023,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,2047,0.021583999196688335
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,2047,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,4095,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,8191,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,8191,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,16383,0.0544053316116333
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,16383,0.027802666028340656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,32767,0.08089600006739299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,32767,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,255,0.07162133355935414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,1,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,1,0.005695999910434087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,3,0.005754666402935982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,3,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,65535,0.06544533371925354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,7,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,65535,0.12874666849772134
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,15,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,7,0.005679999788602193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,15,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,31,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,31,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,63,0.0058133335163195925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,63,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,127,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,127,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,255,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,255,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,131071,0.11128532886505127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,511,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,131071,0.2111146648724874
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,511,0.008367999767263731
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,1023,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,1023,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,2047,0.028757333755493164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,2047,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,4095,0.040106666584809623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,4095,0.01838933303952217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,8191,0.05934933324654897
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,8191,0.024149333437283833
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,16383,0.0872320036093394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,16383,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,32767,0.1251040001710256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,32767,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,1,0.007786666974425316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,1,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,1,0.00595199999709924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,1,0.005754666402935982
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,65535,0.1999733249346415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,65535,0.10617066423098247
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,3,0.008101333553592363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,3,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,3,0.005621333296100299
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,3,0.005615999922156334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,7,0.008053333188096682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,7,0.0081386665503184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,7,0.006095999851822853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,7,0.005653333539764087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,15,0.00754666638871034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,15,0.008010666817426682
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,15,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,15,0.006026666611433029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,31,0.007920000081261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,31,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,131071,0.17941866318384805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,63,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,31,0.0068853336075941724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,131071,0.3593920071919759
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,31,0.007135999699433644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,63,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,63,0.005759999776879947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,63,0.005674666414658229
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,127,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,127,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,127,0.007226666435599327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,127,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,255,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,255,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,255,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,255,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,511,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,511,0.011920000116030375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,511,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,511,0.007813333223263422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,1023,0.01588800052801768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,1023,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,1023,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,1023,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,2047,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,2047,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,2047,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,4095,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,2047,0.008069333309928576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,8191,0.028330666323502857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,4095,0.023904000719388325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,4095,0.03258133431275686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,4095,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,8191,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,8191,0.04571199913819631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,8191,0.022800001005331676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,16383,0.03443199892838796
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,16383,0.0691893349091212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,16383,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,16383,0.03033066789309184
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,32767,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,32767,0.09294399619102478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,32767,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,32767,0.05151999990145365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,65535,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,65535,0.0846560001373291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,65535,0.03551999976237615
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,65535,0.1405226687590281
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,1,0.0074560002734263735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,1,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,1,0.00679466687142849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,1,0.005514666438102722
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,3,0.008037333066264788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,3,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,3,0.0069226666043202085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,3,0.005648000165820122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,7,0.007754666730761528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,7,0.00797333319981893
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,7,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,7,0.005535999933878581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,15,0.007376000285148621
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,15,0.007983999947706858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,15,0.005530666559934616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,15,0.007040000210205714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,31,0.007743999982873599
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,131071,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,31,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,131071,0.008309333274761835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,63,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,31,0.005568000177542369
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,31,0.0068800002336502075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,131071,0.23780266443888345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,131071,0.14615466197331747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,63,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,63,0.006874666859706243
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,63,0.007125333572427432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,127,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,127,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,127,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,127,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,255,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,255,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,255,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,255,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,511,0.008586666857202848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,511,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,511,0.007231999809543292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,511,0.007626666376988093
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,2047,0.013280000537633896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,1023,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,1023,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,1023,0.008016000191370646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,1023,0.00790933333337307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,2047,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,2047,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,2047,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,4095,0.013386666774749756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,8191,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,4095,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,4095,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,4095,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,8191,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,8191,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,8191,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,16383,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,16383,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,16383,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,16383,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,32767,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,32767,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,32767,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,32767,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,65535,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,65535,0.021856000026067097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,1,0.007397333160042763
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,65535,0.0074453335255384445
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,65535,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,1,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,1,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,1,0.008362666393319765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,3,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,3,0.008021333565314611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,3,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,3,0.005637333417932193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,7,0.007861333588759104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,131071,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,7,0.008645333349704742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,7,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,131071,0.029733332494894665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,7,0.005706666658322017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,131071,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,131071,0.029685333371162415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,15,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,15,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,15,0.007296000296870868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,15,0.007424000029762586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,31,0.008442666381597519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,31,0.007589333380262057
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,31,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,31,0.005610666548212369
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,63,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,63,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,63,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,63,0.00730666642387708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,127,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,127,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,127,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,127,0.005882666756709416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,255,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,255,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,255,0.007349333415428798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,255,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,1023,0.016069332758585613
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,511,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,511,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,511,0.010490667074918747
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,511,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,2047,0.006735999758044879
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,1023,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,1023,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,1023,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,2047,0.01370666672786077
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,2047,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,2047,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,4095,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,8191,0.0075573331365982694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,4095,0.013301332791646322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,16383,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,4095,0.007525333513816197
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,4095,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,8191,0.013989333063364029
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,8191,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,8191,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,16383,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,32767,0.016415999581416447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,16383,0.006677333265542984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,16383,0.014005333185195923
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,32767,0.020175999651352566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,32767,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,32767,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,65535,0.017968000223239262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,65535,0.023845332364241283
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,65535,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,65535,0.00761600024998188
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,1,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,1,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,1,0.0069866664707660675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,1,0.006890666360656421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,3,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,3,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,131071,0.021989333132902782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,3,0.007391999786098798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,3,0.005984000240763028
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,7,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,7,0.00731733317176501
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,131071,0.04085866610209147
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,7,0.005957333371043205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,131071,0.00749333327015241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,15,0.010234666367371878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,131071,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,15,0.009557333464423815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,15,0.006853333363930385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,15,0.007055999711155891
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,63,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,31,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,31,0.010079999764760336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,31,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,31,0.007178666690985362
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,63,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,63,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,63,0.005712000032265981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,127,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,127,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,127,0.006911999856432279
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,127,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,255,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,255,0.012469333906968435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,511,0.008058666562040647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,255,0.007002666592597961
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,255,0.00772266648709774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,511,0.016719999412695568
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,511,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,511,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,1023,0.022287999590237934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,1023,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,1023,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,1023,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,2047,0.0276853342851003
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,2047,0.036389333506425224
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,2047,0.008330666770537695
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,2047,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,4095,0.03503466645876566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,4095,0.053130666414896645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,4095,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,4095,0.020693333198626835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,8191,0.034314667185147606
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,8191,0.06803733110427856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,8191,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,8191,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,16383,0.03481066723664602
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,16383,0.04643733302752177
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,16383,0.08982933561007182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,16383,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,32767,0.03457066665093104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,32767,0.13359466195106506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,32767,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,32767,0.07991999884446462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,65535,0.22002132733662924
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,1,0.0075093333919843035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,1,0.007514666765928268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,65535,0.0351200004418691
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,1,0.00559999980032444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,65535,0.14461333552996317
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,1,0.005594666426380475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,65535,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,3,0.0075040000180403394
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,3,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,3,0.005664000287652016
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,3,0.006890666360656421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,7,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,7,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,7,0.007290666922926903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,15,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,15,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,7,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,15,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,15,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,31,0.007637333124876022
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,31,0.008170666793982187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,31,0.007242666557431221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,31,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,63,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,63,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,131071,0.03584533433119456
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,63,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,63,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,127,0.008559999987483025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,127,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,127,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,127,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,255,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,255,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,255,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,255,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,511,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,511,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,511,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,511,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,1023,0.013301332791646322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,1023,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,131071,0.3961919943491618
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,1023,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,131071,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,1023,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,131071,0.27455999453862506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,2047,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,2047,0.013888000200192133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,2047,0.007333333293596904
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,2047,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,4095,0.01469333345691363
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,4095,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,4095,0.00730666642387708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,4095,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,8191,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,8191,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,8191,0.007514666765928268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,8191,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,16383,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,16383,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,16383,0.006784000123540561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,16383,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,32767,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,32767,0.02845866729815801
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,32767,0.0074506668994824094
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,32767,0.019744000087181728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,65535,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,65535,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,65535,0.007407999907930692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,1,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,65535,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,1,0.005914666379491488
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,1,0.010357333347201347
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,3,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,1,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,3,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,3,0.006058666855096817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,3,0.007386666412154834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,7,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,131071,0.028160000840822857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,7,0.010346666599313417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,7,0.006106666599710782
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,131071,0.007237333183487256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,131071,0.058005332946777344
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,7,0.007146666447321574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,15,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,131071,0.03835200021664301
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,15,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,15,0.00679466687142849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,15,0.006128000095486641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,31,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,31,0.010485333700974783
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,31,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,31,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,63,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,63,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,127,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,63,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,63,0.006021333237489064
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,127,0.010154666379094124
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,127,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,127,0.007567999884486198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,255,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,255,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,255,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,255,0.008250666782259941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,511,0.0229066660006841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,511,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,511,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,511,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,1023,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,1023,0.04008533308903376
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,1023,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,1023,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,2047,0.03541333228349686
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,2047,0.051370665431022644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,2047,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,4095,0.06704533100128174
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,2047,0.020453333854675293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,8191,0.035418666899204254
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,4095,0.035936000446478523
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,4095,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,4095,0.028234665592511494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,8191,0.0886240005493164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,8191,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,8191,0.0455626646677653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,16383,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,16383,0.13197867075602213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,16383,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,16383,0.07868800063927968
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,32767,0.2192479968070984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,32767,0.03588266670703888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,32767,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,32767,0.142794668674469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,1,0.014639999717473984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,1,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,1,0.008223999912540117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,1,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,3,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,3,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,3,0.007120000198483467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,3,0.007370666911204656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,7,0.014597332725922266
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,7,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,7,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,65535,0.3901493151982625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,7,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,15,0.014645333091417948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,15,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,65535,0.0359199990828832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,15,0.007194666812817256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,15,0.007162666569153468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,31,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,65535,0.271722674369812
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,65535,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,31,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,31,0.014298666268587112
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,31,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,63,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,63,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,63,0.007461333026488622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,63,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,127,0.014159999787807465
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,127,0.006501333167155583
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,255,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,127,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,127,0.007365333537260692
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,255,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,255,0.027045334378878277
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,255,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,511,0.035455999275048576
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,511,0.04029333343108495
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,511,0.010512000570694605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,511,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,1023,0.036703998843828835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,1023,0.05021866659323374
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,1023,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,1023,0.019871999820073444
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,2047,0.03823466598987579
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,2047,0.06754133105278015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,2047,0.010426666587591171
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,2047,0.027855999767780304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,4095,0.03908800085385641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,4095,0.09075733025868733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,4095,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,4095,0.045461331804593406
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,8191,0.03807466725508372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,8191,0.13344533244768778
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,8191,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,1,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,8191,0.07748266557852428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,1,0.022469334304332733
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,1,0.006768000001708667
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,1,0.0074879998962084455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,3,0.022485333184401195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,3,0.006837333242098491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,3,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,16383,0.0388373335202535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,7,0.022778667509555817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,7,0.0223786657055219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,3,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,16383,0.22159467140833536
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,7,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,16383,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,7,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,15,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,16383,0.1428053379058838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,15,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,15,0.0074346667776505155
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,15,0.007418666655818622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,31,0.02271466702222824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,31,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,31,0.006837333242098491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,31,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,63,0.02275199939807256
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,63,0.022650666534900665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,63,0.007514666765928268
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,63,0.007301333049933116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,127,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,127,0.007941333577036858
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,127,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,127,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,255,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,255,0.04326933125654856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,255,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,255,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,511,0.043338666359583534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,511,0.05169066786766052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,511,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,511,0.020288000504175823
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,1023,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,1023,0.06625066697597504
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,1023,0.013888000200192133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,2047,0.045941332976023354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,1023,0.027994667490323383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,2047,0.09152533610661824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,2047,0.013994666437307993
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,2047,0.04570133487383524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,4095,0.013424000392357508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,4095,0.04625066618124644
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,4095,0.13512000441551208
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,1,0.007850666840871176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,4095,0.07761066655317943
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,1,0.00821333316465219
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,1,0.006831999868154526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,8191,0.045978665351867676
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,1,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,3,0.00814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,3,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,3,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,8191,0.1423306663831075
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,3,0.006853333363930385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,7,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,8191,0.22273067633310953
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,7,0.00786666696270307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,7,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,8191,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,7,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,15,0.007658666620651881
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,15,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,15,0.006858666737874349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,15,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,63,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,31,0.008458666503429413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,31,0.008405333384871483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,31,0.005829333638151486
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,31,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,63,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,127,0.005941333249211311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,255,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,63,0.0069386667261521024
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,63,0.007893333211541176
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,127,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,127,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,127,0.0069440001000960665
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,255,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,255,0.00679466687142849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,255,0.007151999821265538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,511,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,511,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,511,0.008549333239595095
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,511,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,1023,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,1023,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,1023,0.006688000013430913
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,1023,0.007989333321650824
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,2047,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,4095,0.010277333358923594
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,2047,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,2047,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,2047,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,4095,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,4095,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,8191,0.013957332819700241
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,4095,0.006837333242098491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,8191,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,8191,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,16383,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,8191,0.007600000128149986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,16383,0.018485333770513535
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,16383,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,16383,0.007717333113153775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,32767,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,32767,0.0367253323396047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,32767,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,32767,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,65535,0.02625600000222524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,65535,0.05668266614278158
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,65535,0.03342399994532267
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,65535,0.007781333600481351
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,1,0.03980266551176707
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,1,0.040149333576361336
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,131071,0.03276266654332479
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,1,0.01138666644692421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,1,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,131071,0.007311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,131071,0.09160533547401428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,3,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,3,0.040362666050593056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,131071,0.06218666831652323
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,3,0.011407999942700068
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,7,0.03922666609287262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,7,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,15,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,7,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,7,0.01138666644692421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,15,0.040378667414188385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,31,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,31,0.03928533444801966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,15,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,15,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,31,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,31,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,63,0.03921066721280416
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,63,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,63,0.04011200120051702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,63,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,127,0.03945599993069967
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,127,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,255,0.04045333216587702
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,127,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,127,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,255,0.041402667760849
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,255,0.013552000125249227
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,511,0.053690666953722634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,255,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,511,0.0408746674656868
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,511,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,511,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,1023,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,1023,0.042021334171295166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,1023,0.07671999931335449
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,1023,0.04313066601753235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,2047,0.07486933469772339
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,2047,0.04460800190766653
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,2047,0.12383466958999634
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,2047,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,4095,0.045850664377212524
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,1,0.01846933364868164
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,4095,0.13941867152849832
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,1,0.07180800040562947
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,1,0.07269333302974701
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,4095,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,1,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,4095,0.20996799071629843
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,3,0.07145066559314728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,7,0.0717439999183019
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,3,0.07170666754245758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,3,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,3,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,15,0.07153066496054332
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,7,0.07167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,7,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,15,0.07155733307202657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,7,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,31,0.07148799796899159
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,15,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,15,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,31,0.07277333239714305
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,63,0.07091199855009715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,31,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,31,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,63,0.01836799954374631
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,63,0.07176533341407776
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,127,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,63,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,127,0.07149866720040639
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,127,0.07195733487606049
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,255,0.07687999804814656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,127,0.022096000611782074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,255,0.07796800136566162
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,255,0.029680001238981884
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,255,0.022287999590237934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,511,0.0468800018231074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,511,0.07751466830571492
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,511,0.09823999802271526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,511,0.02219199885924657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,1023,0.021546666820844013
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,1,0.007605333502093951
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,1023,0.07712533573309581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,1,0.007573333258430163
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,1023,0.14300266901652017
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,2047,0.08055466910203297
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,1023,0.07969599962234497
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,1,0.005509333064158757
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,1,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,3,0.008117333054542542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,3,0.007642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,3,0.005701333284378052
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,3,0.008256000156203905
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,7,0.008261333530147871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,7,0.008170666793982187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,7,0.005605333174268405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,7,0.005775999898711841
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,2047,0.23241066932678223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,15,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,2047,0.022117334107557934
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,15,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,2047,0.14285332957903543
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,15,0.007269333427151044
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,15,0.005605333174268405
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,31,0.007994666695594788
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,31,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,31,0.007151999821265538
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,31,0.007413333281874657
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,127,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,127,0.0069759997228781385
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,63,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,63,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,63,0.005637333417932193
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,63,0.005824000264207522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,127,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,127,0.00744000015159448
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,255,0.009557333464423815
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,255,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,255,0.00725333330531915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,255,0.007216000308593114
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,511,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,511,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,511,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,511,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,1023,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,1023,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,1023,0.006906666482488315
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,1023,0.008234666660428047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,2047,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,2047,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,2047,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,2047,0.010165333126982054
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,4095,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,4095,0.02022933339079221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,4095,0.007141333073377609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,4095,0.011637333780527115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,8191,0.020421333611011505
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,16383,0.02277333289384842
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,8191,0.02903466671705246
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,8191,0.007680000116427739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,8191,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,16383,0.038245332737763725
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,16383,0.007680000116427739
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,16383,0.02072000006834666
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,32767,0.028549333413441975
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,32767,0.06213866670926412
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,32767,0.007936000203092894
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,32767,0.03548266738653183
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,65535,0.03510933369398117
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,1,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,65535,0.0940053363641103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,1,0.005653333539764087
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,65535,0.008218666538596153
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,65535,0.05268266797065735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,3,0.005834666391213735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,3,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,7,0.0058399997651577
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,7,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,15,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,15,0.007594666754206021
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,31,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,131071,0.037802666425704956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,31,0.0075093333919843035
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,63,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,63,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,127,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,127,0.007258666679263115
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,255,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,255,0.007967999825874964
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,131071,0.14618133505185446
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,511,0.013861333330472311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,131071,0.008293333152929941
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,511,0.007877333089709282
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,1023,0.020015999674797058
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,131071,0.09325333436330159
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,1023,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,2047,0.02900800108909607
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,2047,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,4095,0.04124800115823746
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,4095,0.018581333259741466
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,8191,0.06026133398214976
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,8191,0.02430933217207591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,16383,0.0881760021050771
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,16383,0.03992533435424169
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,32767,0.1271519958972931
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,32767,0.06201600035031637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,1,0.008410666758815447
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,1,0.007130666946371396
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,3,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,3,0.005535999933878581
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,7,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,7,0.006890666360656421
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,15,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,15,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,65535,0.10602133472760518
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,31,0.009882666791478792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,65535,0.20314133167266846
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,31,0.007146666447321574
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,63,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,63,0.007536000261704127
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,127,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,127,0.007429333403706551
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,255,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,1023,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,255,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,511,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,511,0.008122666428486506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,1023,0.008463999877373377
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,2047,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,2047,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,4095,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,4095,0.010469333579142889
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,8191,0.018730666488409042
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,131071,0.18030399084091187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,8191,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,131071,0.36116798718770343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,16383,0.02048533285657565
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,16383,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,32767,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,32767,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,1,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,65535,0.02868266652027766
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,1,0.008538666491707167
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,3,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,3,0.007007999966541926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,7,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,65535,0.023738667368888855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,7,0.008261333530147871
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,15,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,15,0.006954666847983996
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,31,0.010159999753038088
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,31,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,131071,0.03985599925120672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,63,0.007322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,127,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,131071,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,127,0.008469333251317343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,255,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,255,0.007280000175038974
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,511,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,511,0.007903999959429106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,1023,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,1023,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,2047,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,2047,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,4095,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,4095,0.010506667196750641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,8191,0.019445333629846573
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,8191,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,16383,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,16383,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,32767,0.02624000112215678
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,32767,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,65535,0.030447999636332195
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,1,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,1,0.007173333317041397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,65535,0.023941333095232647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,3,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,3,0.007391999786098798
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,7,0.010378666842977205
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,7,0.007114666824539502
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,15,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,15,0.007082666580875714
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,131071,0.0581279993057251
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,31,0.00720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,63,0.010346666599313417
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,131071,0.034101332227389015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,63,0.007029333462317784
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,127,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,127,0.0069973332186539965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,255,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,255,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,511,0.019845332950353622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,511,0.008597333605090777
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,1023,0.02805333336194356
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,1023,0.014010666559139887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,2047,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,2047,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,4095,0.06852266689141591
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,4095,0.026021334032217663
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,8191,0.037674665451049805
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,8191,0.08669333656628926
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,16383,0.12203199664751689
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,16383,0.057328000664711
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,1,0.008277333031098047
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,1,0.006858666737874349
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,3,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,32767,0.19549866517384848
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,3,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,7,0.008336000144481659
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,32767,0.09762133161226909
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,7,0.007098666702707608
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,15,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,15,0.005632000043988228
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,31,0.00996800015370051
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,31,0.007120000198483467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,63,0.011530666301647821
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,127,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,63,0.005594666426380475
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,127,0.007061333085099856
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,255,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,255,0.00795199970404307
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,65535,0.17825067043304443
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,1023,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,65535,0.3378506501515706
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,511,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,511,0.008117333054542542
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,1023,0.010010666524370512
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,2047,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,2047,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,4095,0.017637333522240322
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,4095,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,8191,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,8191,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,16383,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,16383,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,131071,0.62718399365743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,32767,0.03712533414363861
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,32767,0.020746666938066483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,1,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,65535,0.026687999566396076
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,65535,0.05023466547330221
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,1,0.005941333249211311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,3,0.010250666489203772
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,3,0.007285333548982938
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,7,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,7,0.005824000264207522
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,15,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,131071,0.3367679913838704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,15,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,131071,0.08062399923801422
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,31,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,31,0.005850666513045629
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,63,0.010319999729593595
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,131071,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,63,0.007274666801095009
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,127,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,127,0.00697066696981589
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,255,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,511,0.028175999720891316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,255,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,511,0.013546666751305262
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,1023,0.046336000164349876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,1023,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,2047,0.06674133241176605
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,2047,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,4095,0.08541867136955261
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,4095,0.03736533224582672
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,8191,0.12097600102424622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,8191,0.05705066521962484
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,16383,0.19419199228286743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,16383,0.09718933701515198
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,1,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,1,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,3,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,3,0.00726400005320708
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,7,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,7,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,15,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,15,0.00766933336853981
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,31,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,32767,0.3397066593170166
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,32767,0.17589332660039267
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,31,0.00706666645904382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,63,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,63,0.007402666533986728
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,127,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,127,0.0074560002734263735
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,255,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,255,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,511,0.0468800018231074
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,511,0.019760000209013622
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,1023,0.06346666812896729
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,1023,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,2047,0.0846720039844513
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,65535,0.6261920134226481
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,65535,0.33452800909678143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,2047,0.038378665844599404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,4095,0.12211199601491292
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,4095,0.0563679983218511
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,8191,0.19550933440526327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,3,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,1,0.025583999852339428
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,8191,0.09690666198730469
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,1,0.010351999973257383
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,7,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,3,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,16383,0.3401813507080078
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,7,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,15,0.025631998976071674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,31,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,15,0.010191999996701876
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,31,0.025706666211287182
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,63,0.02478400121132533
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,16383,0.17692800362904867
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,63,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,127,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,255,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,127,0.025610665480295818
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,255,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,511,0.024432001014550526
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,511,0.0652213344971339
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,1023,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,1023,0.08292266726493835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,2047,0.05589333176612854
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,1,0.008522666369875273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,2047,0.12330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,1,0.007018666714429855
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,3,0.008447999755541483
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,3,0.005615999922156334
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,7,0.008266666904091835
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,7,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,4095,0.19500267505645752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,15,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,4095,0.09661333759625752
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,15,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,8191,0.33790401617685956
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,31,0.01003200002014637
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,31,0.005562666803598404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,63,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,63,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,127,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,511,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,8191,0.17593065897623697
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,127,0.00721066693464915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,255,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,255,0.007471999774376552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,511,0.008527999743819237
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,1023,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,4095,0.01811733345190684
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,1023,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,2047,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,2047,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,4095,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,8191,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,8191,0.014229333649079004
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,16383,0.0322826678554217
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,16383,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,32767,0.052298665046691895
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,32767,0.024400000770886738
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,65535,0.07627200086911519
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,65535,0.04490133126576742
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,1,0.044624000787734985
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,1,0.01434133326013883
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,3,0.0444160004456838
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,3,0.014293332894643148
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,7,0.045226668318112694
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,7,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,31,0.044250667095184326
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,15,0.04574933151404063
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,15,0.014250667144854864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,131071,0.12803199887275696
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,131071,0.06578666468461354
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,31,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,127,0.04462933540344238
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,63,0.04479999840259552
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,63,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,127,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,255,0.0543093333641688
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,255,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,1023,0.10486933588981628
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,511,0.0710506687561671
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,511,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,1023,0.053264002005259194
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,1,0.08090666433175404
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,2047,0.09356799721717834
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,2047,0.17799999316533408
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,1,0.024314666787783306
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,3,0.0812853326400121
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,7,0.08098133405049641
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,4095,0.3165066639582316
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,3,0.024490666886170704
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,4095,0.17350399494171143
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,7,0.02441066751877467
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,31,0.023599999646345775
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,15,0.08105599880218506
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,15,0.024277334411938984
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,31,0.08111999928951263
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,63,0.08072533210118611
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,63,0.024112001061439514
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,255,0.09641599655151367
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,127,0.0836853285630544
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,127,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,1,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,255,0.04015466570854187
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,1,0.006981333096822103
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,511,0.12899733583132425
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,511,0.05974400043487549
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,3,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,3,0.0069333333522081375
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,7,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,7,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,15,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,15,0.00702400008837382
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,31,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,31,0.0068693334857622785
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,1023,0.09922666351000468
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,2047,0.3370506763458252
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,63,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,63,0.00707733320693175
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,1023,0.19803732633590698
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,255,0.008042666440208754
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,127,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,127,0.006965333595871925
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,1023,0.014021333307027817
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,255,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,511,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,511,0.007610666876037915
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,2047,0.17827733357747397
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,1023,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,2047,0.022533332308133442
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,2047,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,4095,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,4095,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,8191,0.03659199923276901
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,8191,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,16383,0.053504000107447304
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,16383,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,32767,0.03844800094763438
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,32767,0.08225599924723308
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,65535,0.12902933359146118
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,65535,0.06523733337720235
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,131071,0.2101973295211792
TRTLLM,1.2.0rc5,NVIDIA H200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,131071,0.11103999614715576
