framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,window_size,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,float16,0,176.8089599609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,fp8,0,179.4908243815104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,2,64,0,1,float16,float16,0,177.38614908854166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,1,64,0,1,fp8,fp8,0,232.8183390299479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,float16,0,180.0997517903646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,2,64,0,1,float16,fp8,0,180.87288411458334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,fp8,0,181.9178670247396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,2,64,0,1,fp8,fp8,0,238.4185994466146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,float16,0,94.69371541341145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,float16,0,180.25130208333334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,fp8,0,93.5338643391927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,96,64,0,1,fp8,fp8,0,123.01943969726562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,fp8,0,180.57574462890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,4,64,0,1,fp8,fp8,0,236.7332763671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,float16,0,94.96422322591145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,8,64,0,1,fp8,fp8,0,237.5594685872396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,fp8,0,93.11215209960938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,2,64,0,1,float16,float16,0,92.42794799804688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,1,64,0,1,fp8,fp8,0,114.86686197916667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,2,64,0,1,float16,fp8,0,93.08193969726562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,float16,0,89.2035420735677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,fp8,0,93.17990112304688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,2,64,0,1,fp8,fp8,0,115.02677408854167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,float16,0,46.55257670084635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,fp8,0,45.55895487467448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,float16,0,91.35547892252605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,fp8,0,89.51210530598958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,float16,0,44.60407511393229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,96,64,0,1,fp8,fp8,0,61.45006815592448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,4,64,0,1,fp8,fp8,0,116.03711954752605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,8,64,0,1,fp8,fp8,0,115.9558817545573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,fp8,0,45.247660319010414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,1,64,0,1,fp8,fp8,0,57.60460917154948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,2,64,0,1,float16,float16,0,44.23219299316406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,2,64,0,1,float16,fp8,0,44.587005615234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,float16,0,44.751190185546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,2,64,0,1,fp8,fp8,0,57.45647176106771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,fp8,0,44.070231119791664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,float16,0,44.31513468424479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,float16,0,22.962857564290363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,4,64,0,1,fp8,fp8,0,57.872212727864586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,fp8,0,23.530838012695312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,fp8,0,43.57512410481771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,float16,0,23.013206481933594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,96,64,0,1,fp8,fp8,0,30.179840087890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,8,64,0,1,fp8,fp8,0,57.94798787434896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,fp8,0,23.235926310221355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,2,64,0,1,float16,float16,0,22.75157419840495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,2,64,0,1,float16,fp8,0,23.21117909749349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,1,64,0,1,fp8,fp8,0,29.084330240885418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,float16,0,22.332074483235676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,fp8,0,22.35016632080078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,2,64,0,1,fp8,fp8,0,29.203285217285156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,float16,0,22.281046549479168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,4,64,0,1,fp8,fp8,0,29.16966501871745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,fp8,0,22.120277404785156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,8,64,0,1,fp8,fp8,0,29.45075225830078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,float16,0,104.77005004882812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,fp8,0,106.53201293945312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,2,64,0,1,float16,float16,0,103.4270731608073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,2,64,0,1,float16,fp8,0,104.80759684244792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,1,64,0,1,fp8,fp8,0,131.70431518554688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,float16,0,104.48708089192708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,fp8,0,106.34393310546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,2,64,0,1,fp8,fp8,0,133.40279134114584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,float16,0,54.694742838541664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,fp8,0,54.41041056315104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,float16,0,52.44654846191406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,float16,0,104.6640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,fp8,0,101.93595377604167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,96,64,0,1,fp8,fp8,0,71.67419942220052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,4,64,0,1,fp8,fp8,0,133.8422648111979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,fp8,0,51.72889709472656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,8,64,0,1,fp8,fp8,0,135.33627319335938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,1,64,0,1,fp8,fp8,0,66.0524393717448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,2,64,0,1,float16,float16,0,51.249664306640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,2,64,0,1,float16,fp8,0,51.72736104329427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,float16,0,51.3433583577474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,2,64,0,1,fp8,fp8,0,66.1748046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,fp8,0,50.647552490234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,float16,0,27.508394877115887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,fp8,0,27.47528584798177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,float16,0,50.73527526855469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,4,64,0,1,fp8,fp8,0,66.29103088378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,fp8,0,50.68168640136719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,float16,0,26.259969075520832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,96,64,0,1,fp8,fp8,0,35.0730234781901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,8,64,0,1,fp8,fp8,0,66.74381001790364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,fp8,0,26.143231709798176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,2,64,0,1,float16,float16,0,26.2290776570638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,2,64,0,1,float16,fp8,0,25.883989969889324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,1,64,0,1,fp8,fp8,0,33.30269877115885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,float16,0,25.807530721028645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,2,64,0,1,fp8,fp8,0,32.90231577555338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,fp8,0,25.29894510904948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,4,64,0,1,fp8,fp8,0,33.11616007486979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,float16,0,14.176597595214844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,float16,0,25.457494099934895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,fp8,0,25.94611104329427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,fp8,0,13.74771245320638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,float16,0,13.30892817179362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,96,64,0,1,fp8,fp8,0,17.54794692993164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,8,64,0,1,fp8,fp8,0,32.91886901855469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,fp8,0,13.680127461751303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,1,64,0,1,fp8,fp8,0,16.946346282958984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,2,64,0,1,float16,float16,0,13.474815368652344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,2,64,0,1,float16,fp8,0,13.461504618326822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,float16,0,13.259264628092447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,fp8,0,13.13092295328776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,2,64,0,1,fp8,fp8,0,16.632490793863933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,4,64,0,1,fp8,fp8,0,16.67191441853841
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,float16,0,13.250048319498697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,fp8,0,13.289642333984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,8,64,0,1,fp8,fp8,0,16.764415740966797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,float16,0,79.05792236328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,fp8,0,76.48870340983073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,2,64,0,1,float16,float16,0,77.06078084309895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,2,64,0,1,float16,fp8,0,76.04121398925781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,1,64,0,1,fp8,fp8,0,93.145263671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,float16,0,77.2299092610677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,fp8,0,76.0791015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,2,64,0,1,fp8,fp8,0,93.99774169921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,float16,0,40.38263448079427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,fp8,0,39.59295908610026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,float16,0,37.00548299153646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,96,64,0,1,fp8,fp8,0,49.86760457356771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,float16,0,75.5416768391927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,fp8,0,75.29966735839844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,4,64,0,1,fp8,fp8,0,95.081298828125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,8,64,0,1,fp8,fp8,0,94.30220540364583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,fp8,0,36.99114735921224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,2,64,0,1,float16,float16,0,36.70544942220052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,1,64,0,1,fp8,fp8,0,46.493865966796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,2,64,0,1,float16,fp8,0,36.49109395345052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,float16,0,36.091051737467446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,2,64,0,1,fp8,fp8,0,46.252034505208336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,fp8,0,36.30864969889323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,float16,0,19.617450714111328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,float16,0,36.184234619140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,4,64,0,1,fp8,fp8,0,46.099283854166664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,fp8,0,19.036331176757812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,fp8,0,35.581441243489586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,96,64,0,1,fp8,fp8,0,24.863914489746094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,float16,0,19.019264221191406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,8,64,0,1,fp8,fp8,0,46.87547810872396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,fp8,0,18.702164967854817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,2,64,0,1,float16,float16,0,18.62997309366862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,1,64,0,1,fp8,fp8,0,23.070037841796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,2,64,0,1,float16,fp8,0,18.36782964070638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,float16,0,18.18265660603841
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,2,64,0,1,fp8,fp8,0,23.196673075358074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,fp8,0,18.421077728271484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,4,64,0,1,fp8,fp8,0,23.411542256673176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,float16,0,9.848149617513021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,float16,0,18.223103841145832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,fp8,0,18.332842508951824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,fp8,0,9.83517837524414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,float16,0,9.810431798299154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,8,64,0,1,fp8,fp8,0,23.16663360595703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,96,64,0,1,fp8,fp8,0,12.580181121826172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,fp8,0,9.739605585734049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,2,64,0,1,float16,float16,0,9.759061177571615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,1,64,0,1,fp8,fp8,0,11.767637888590494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,2,64,0,1,float16,fp8,0,9.300309499104818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,float16,0,9.766058603922525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,2,64,0,1,fp8,fp8,0,11.815765380859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,fp8,0,9.611946741739908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,4,64,0,1,fp8,fp8,0,11.848191579182943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,float16,0,9.90771230061849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,fp8,0,9.521664301554361
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,8,64,0,1,fp8,fp8,0,11.779071807861328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,float16,0,98.4052022298177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,fp8,0,101.5154317220052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,2,64,0,1,float16,fp8,0,100.25283813476562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,2,64,0,1,float16,float16,0,99.24676513671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,float16,0,99.65687052408855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,fp8,0,98.57535807291667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,1,64,0,1,fp8,fp8,0,122.47654215494792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,2,64,0,1,fp8,fp8,0,126.6382548014323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,float16,0,53.15174357096354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,fp8,0,52.53153991699219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,float16,0,49.30389404296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,96,64,0,1,fp8,fp8,0,67.15767415364583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,float16,0,100.16102091471355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,fp8,0,98.81736246744792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,4,64,0,1,fp8,fp8,0,126.11532592773438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,8,64,0,1,fp8,fp8,0,126.36057535807292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,fp8,0,48.52001953125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,2,64,0,1,float16,float16,0,49.00164286295573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,1,64,0,1,fp8,fp8,0,59.90638732910156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,2,64,0,1,float16,fp8,0,47.1383056640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,float16,0,47.32774353027344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,2,64,0,1,fp8,fp8,0,60.77491251627604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,fp8,0,47.21766153971354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,float16,0,25.89354705810547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,float16,0,47.832916259765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,4,64,0,1,fp8,fp8,0,60.47607421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,fp8,0,25.894569396972656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,fp8,0,48.56934611002604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,float16,0,24.233642578125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,8,64,0,1,fp8,fp8,0,60.85034688313802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,96,64,0,1,fp8,fp8,0,33.10199483235677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,fp8,0,24.046592712402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,1,64,0,1,fp8,fp8,0,30.02624003092448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,2,64,0,1,float16,fp8,0,24.32494862874349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,2,64,0,1,float16,float16,0,24.331776936848957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,2,64,0,1,fp8,fp8,0,30.38549296061198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,float16,0,23.496192932128906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,fp8,0,23.685630798339844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,float16,0,23.70781962076823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,4,64,0,1,fp8,fp8,0,30.322858174641926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,float16,0,13.344596862792969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,fp8,0,23.921663920084637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,fp8,0,13.464064280192057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,float16,0,12.52846908569336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,96,64,0,1,fp8,fp8,0,16.365909576416016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,8,64,0,1,fp8,fp8,0,30.266878763834637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,fp8,0,12.369920094807943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,1,64,0,1,fp8,fp8,0,15.184383392333984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,2,64,0,1,float16,fp8,0,12.58444849650065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,2,64,0,1,float16,float16,0,12.488362630208334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,2,64,0,1,fp8,fp8,0,15.287466684977213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,float16,0,12.090367635091146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,fp8,0,12.193621317545572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,float16,0,12.16324234008789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,4,64,0,1,fp8,fp8,0,15.094443003336588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,float16,0,6.378837585449219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,fp8,0,12.407124837239584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,fp8,0,6.283775965372722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,float16,0,6.403584162394206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,8,64,0,1,fp8,fp8,0,15.15707778930664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,96,64,0,1,fp8,fp8,0,8.302080154418945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,fp8,0,5.965311686197917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,1,64,0,1,fp8,fp8,0,7.6977494557698565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,2,64,0,1,float16,fp8,0,5.6830291748046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,2,64,0,1,float16,float16,0,5.792256037394206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,2,64,0,1,fp8,fp8,0,7.825237274169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,float16,0,5.205845197041829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,fp8,0,6.472533543904622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,4,64,0,1,fp8,fp8,0,7.736661275227864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,float16,0,6.129493077596028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,fp8,0,6.039552052815755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,8,64,0,1,fp8,fp8,0,7.839743932088216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,float16,0,57.63788859049479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,fp8,0,58.68646240234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,2,64,0,1,float16,fp8,0,57.10063171386719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,2,64,0,1,float16,float16,0,58.090667724609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,1,64,0,1,fp8,fp8,0,70.02384948730469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,float16,0,57.11052958170573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,fp8,0,57.5831044514974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,2,64,0,1,fp8,fp8,0,71.225341796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,float16,0,31.64979298909505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,fp8,0,31.285247802734375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,float16,0,28.668075561523438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,96,64,0,1,fp8,fp8,0,39.66429901123047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,float16,0,58.43097432454427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,fp8,0,56.971435546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,4,64,0,1,fp8,fp8,0,71.65098571777344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,8,64,0,1,fp8,fp8,0,72.25395202636719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,fp8,0,28.16460673014323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,2,64,0,1,float16,float16,0,27.703465779622395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,1,64,0,1,fp8,fp8,0,34.52535502115885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,2,64,0,1,float16,fp8,0,28.686678568522137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,float16,0,27.722068786621094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,2,64,0,1,fp8,fp8,0,34.930005391438804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,fp8,0,27.761492411295574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,4,64,0,1,fp8,fp8,0,34.7506357828776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,float16,0,27.521024068196613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,float16,0,15.962453206380209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,fp8,0,27.585535685221355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,fp8,0,15.528277079264322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,float16,0,14.726826985677084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,8,64,0,1,fp8,fp8,0,34.94587707519531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,96,64,0,1,fp8,fp8,0,19.73367436726888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,fp8,0,14.58346684773763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,2,64,0,1,float16,float16,0,14.655317942301432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,2,64,0,1,float16,fp8,0,14.450687408447266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,1,64,0,1,fp8,fp8,0,17.139882405598957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,2,64,0,1,fp8,fp8,0,17.2936528523763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,float16,0,14.305620829264322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,fp8,0,14.332416534423828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,4,64,0,1,fp8,fp8,0,17.624234517415363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,float16,0,14.430890401204428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,float16,0,7.75270398457845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,fp8,0,7.986517588297526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,fp8,0,14.662826538085938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,8,64,0,1,fp8,fp8,0,17.444180806477863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,float16,0,6.9946028391520185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,96,64,0,1,fp8,fp8,0,9.684821446736654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,fp8,0,7.572479883829753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,1,64,0,1,fp8,fp8,0,8.740181605021158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,2,64,0,1,float16,float16,0,6.448981602986653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,2,64,0,1,float16,fp8,0,7.351807912190755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,float16,0,7.047509511311849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,2,64,0,1,fp8,fp8,0,8.799402872721354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,fp8,0,7.087957382202148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,float16,0,7.6257279713948565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,4,64,0,1,fp8,fp8,0,8.779946645100912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,fp8,0,7.446698506673177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,8,64,0,1,fp8,fp8,0,8.761685053507486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,float16,0,3.693056106567383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,float16,0,3.306666692097982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,fp8,0,3.644415855407715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,96,64,0,1,fp8,fp8,0,4.861440022786458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,fp8,0,3.1293439865112305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,1,64,0,1,fp8,fp8,0,4.393472035725911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,2,64,0,1,float16,float16,0,3.181056022644043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,2,64,0,1,float16,fp8,0,3.0921386082967124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,2,64,0,1,fp8,fp8,0,4.387157440185547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,fp8,0,3.1976105372111
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,float16,0,3.3274879455566406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,4,64,0,1,fp8,fp8,0,4.433237393697103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,float16,0,3.141632080078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,fp8,0,3.2766294479370117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,8,64,0,1,fp8,fp8,0,4.439551989237468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,float16,0,54.185811360677086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,fp8,0,55.79980977376302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,2,64,0,1,float16,float16,0,56.15735371907552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,2,64,0,1,float16,fp8,0,56.71696980794271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,float16,0,56.3235829671224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,1,64,0,1,fp8,fp8,0,66.57877095540364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,fp8,0,56.45704650878906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,2,64,0,1,fp8,fp8,0,70.39931742350261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,fp8,0,31.319040934244793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,float16,0,32.25019836425781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,float16,0,27.837270100911457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,96,64,0,1,fp8,fp8,0,39.29565938313802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,float16,0,55.84571838378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,fp8,0,55.357096354166664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,4,64,0,1,fp8,fp8,0,70.26159159342448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,8,64,0,1,fp8,fp8,0,71.53680928548177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,fp8,0,26.840916951497395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,2,64,0,1,float16,float16,0,27.717801411946613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,1,64,0,1,fp8,fp8,0,31.945045471191406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,2,64,0,1,float16,fp8,0,27.25102996826172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,float16,0,26.716161092122395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,2,64,0,1,fp8,fp8,0,32.14796702067057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,fp8,0,26.443946838378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,4,64,0,1,fp8,fp8,0,32.72533416748047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,float16,0,26.43182881673177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,float16,0,16.06468327840169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,fp8,0,27.503616333007812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,fp8,0,15.257770538330078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,float16,0,13.813589731852213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,96,64,0,1,fp8,fp8,0,18.914133707682293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,8,64,0,1,fp8,fp8,0,32.68898010253906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,fp8,0,14.152875264485678
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,2,64,0,1,float16,float16,0,13.744127909342447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,1,64,0,1,fp8,fp8,0,16.00341288248698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,2,64,0,1,float16,fp8,0,14.080682118733725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,2,64,0,1,fp8,fp8,0,16.07014338175456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,float16,0,13.579093933105469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,fp8,0,13.522261301676432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,4,64,0,1,fp8,fp8,0,16.090452829996746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,float16,0,13.66988754272461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,float16,0,8.220501581827799
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,fp8,0,13.686442057291666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,fp8,0,7.784789403279622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,8,64,0,1,fp8,fp8,0,16.2694829305013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,float16,0,6.422186533610026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,96,64,0,1,fp8,fp8,0,9.424383799235025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,fp8,0,5.834410349527995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,2,64,0,1,float16,float16,0,7.125674565633138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,1,64,0,1,fp8,fp8,0,8.042154947916666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,2,64,0,1,float16,fp8,0,6.797482808430989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,float16,0,6.9386240641276045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,2,64,0,1,fp8,fp8,0,8.146944046020508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,fp8,0,6.7770029703776045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,4,64,0,1,fp8,fp8,0,8.119295756022135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,float16,0,6.744234720865886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,float16,0,3.88266658782959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,fp8,0,6.43942387898763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,float16,0,2.9750614166259766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,fp8,0,3.659776051839193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,96,64,0,1,fp8,fp8,0,4.640938758850098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,8,64,0,1,fp8,fp8,0,8.133973439534506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,fp8,0,2.8596906661987305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,1,64,0,1,fp8,fp8,0,3.944448153177897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,2,64,0,1,float16,float16,0,2.9962240854899087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,float16,0,2.994688034057617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,2,64,0,1,fp8,fp8,0,3.947178522745768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,fp8,0,2.985472043355306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,2,64,0,1,float16,fp8,0,3.126271883646647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,float16,0,3.011925379435221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,4,64,0,1,fp8,fp8,0,4.041386604309082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,fp8,0,2.90611203511556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,8,64,0,1,fp8,fp8,0,4.006400108337402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,float16,0,1.8317653338114421
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,fp8,0,1.7827839851379395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,96,64,0,1,fp8,fp8,0,2.2857386271158853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,fp8,0,1.4581759770711262
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,1,64,0,1,fp8,fp8,0,2.0015786488850913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,float16,0,1.4829227129618328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,2,64,0,1,float16,float16,0,1.4513492584228516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,2,64,0,1,float16,fp8,0,1.466368039449056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,2,64,0,1,fp8,fp8,0,1.998677412668864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,float16,0,1.473024050394694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,fp8,0,1.4586879412333171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,float16,0,1.4859946568806965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,4,64,0,1,fp8,fp8,0,1.993557294209798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,fp8,0,1.4624427159627278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,8,64,0,1,fp8,fp8,0,1.9744426409403484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,float16,0,32.80605824788412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,fp8,0,33.316864013671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,2,64,0,1,float16,float16,0,32.8285878499349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,2,64,0,1,float16,fp8,0,33.24586741129557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,1,64,0,1,fp8,fp8,0,37.883392333984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,fp8,0,32.9181874593099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,float16,0,33.44196319580078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,2,64,0,1,fp8,fp8,0,38.83434549967448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,float16,0,16.178858439127605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,float16,0,20.282367706298828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,fp8,0,19.47136052449544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,float16,0,32.6104736328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,96,64,0,1,fp8,fp8,0,23.3895263671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,fp8,0,32.71782430013021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,4,64,0,1,fp8,fp8,0,39.51189422607422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,8,64,0,1,fp8,fp8,0,39.95357767740885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,fp8,0,16.379051208496094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,2,64,0,1,float16,float16,0,16.230740865071613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,1,64,0,1,fp8,fp8,0,18.69858169555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,2,64,0,1,float16,fp8,0,16.194730122884113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,float16,0,16.205994923909504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,2,64,0,1,fp8,fp8,0,19.082752227783203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,fp8,0,16.054101308186848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,4,64,0,1,fp8,fp8,0,19.116544087727863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,float16,0,15.975424448649088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,float16,0,9.847466786702475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,fp8,0,16.218624114990234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,fp8,0,9.465514500935873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,float16,0,8.643072128295898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,8,64,0,1,fp8,fp8,0,19.042816162109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,96,64,0,1,fp8,fp8,0,11.647317250569662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,fp8,0,8.422058741251627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,1,64,0,1,fp8,fp8,0,9.325226465861002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,2,64,0,1,float16,float16,0,8.471722920735678
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,2,64,0,1,float16,fp8,0,8.310101191202799
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,float16,0,8.1889279683431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,2,64,0,1,fp8,fp8,0,9.436501185099283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,fp8,0,8.086015701293945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,4,64,0,1,fp8,fp8,0,9.545728047688803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,float16,0,8.265216191609701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,fp8,0,7.768063863118489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,float16,0,4.848469416300456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,fp8,0,4.769280115763347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,96,64,0,1,fp8,fp8,0,5.653333028157552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,float16,0,3.5824639002482095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,8,64,0,1,fp8,fp8,0,9.582250595092773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,fp8,0,3.849045435587565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,1,64,0,1,fp8,fp8,0,4.690431912740071
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,2,64,0,1,float16,float16,0,3.6333227157592773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,2,64,0,1,float16,fp8,0,3.622741381327311
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,2,64,0,1,fp8,fp8,0,4.66210142771403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,float16,0,3.6952746709187827
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,fp8,0,3.4930346806844077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,4,64,0,1,fp8,fp8,0,4.681045214335124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,fp8,0,3.7661012013753257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,float16,0,3.804330507914225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,float16,0,1.6541013717651367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,fp8,0,2.311338742574056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,96,64,0,1,fp8,fp8,0,2.7648000717163086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,float16,0,2.3512746493021646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,fp8,0,1.6488107045491536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,8,64,0,1,fp8,fp8,0,4.685824076334636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,1,64,0,1,fp8,fp8,0,2.273791948954264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,2,64,0,1,float16,float16,0,1.683285395304362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,2,64,0,1,float16,fp8,0,1.6423254013061523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,float16,0,1.6511999766031902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,2,64,0,1,fp8,fp8,0,2.25928529103597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,fp8,0,1.6390825907389324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,4,64,0,1,fp8,fp8,0,2.2831786473592124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,float16,0,1.6665600140889485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,fp8,0,1.6907946268717449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,8,64,0,1,fp8,fp8,0,2.295637289683024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,float16,0,1.1484159628550212
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,fp8,0,1.0938026905059814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,96,64,0,1,fp8,fp8,0,1.4047573407491047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,fp8,0,0.8794453144073486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,float16,0,0.8722773392995199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,1,64,0,1,fp8,fp8,0,1.1892053286234539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,2,64,0,1,float16,float16,0,0.8879786332448324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,2,64,0,1,float16,fp8,0,0.892245372136434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,2,64,0,1,fp8,fp8,0,1.1810133457183838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,float16,0,0.8714240392049154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,fp8,0,0.8920746644337972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,4,64,0,1,fp8,fp8,0,1.1907413005828857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,float16,0,0.869376023610433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,fp8,0,0.8640853563944498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,8,64,0,1,fp8,fp8,0,1.198250691095988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,float16,0,33.93450673421224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,fp8,0,33.42574818929037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,2,64,0,1,float16,fp8,0,33.77391052246094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,2,64,0,1,float16,float16,0,33.77749379475912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,float16,0,34.037760416666664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,1,64,0,1,fp8,fp8,0,37.874176025390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,2,64,0,1,fp8,fp8,0,39.864662170410156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,fp8,0,33.95771789550781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,float16,0,16.3962885538737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,fp8,0,21.390335083007812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,float16,0,22.637227376302082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,96,64,0,1,fp8,fp8,0,24.79889170328776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,float16,0,33.68925984700521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,fp8,0,33.45271555582682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,4,64,0,1,fp8,fp8,0,41.25388844807943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,8,64,0,1,fp8,fp8,0,41.196032206217446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,fp8,0,16.44714609781901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,2,64,0,1,float16,float16,0,15.842815399169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,1,64,0,1,fp8,fp8,0,18.001407623291016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,2,64,0,1,float16,fp8,0,16.001195271809895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,float16,0,15.954432169596354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,2,64,0,1,fp8,fp8,0,18.25279998779297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,fp8,0,16.51251220703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,4,64,0,1,fp8,fp8,0,18.614613850911457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,float16,0,16.37188212076823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,fp8,0,15.749120076497396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,float16,0,10.90542984008789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,8,64,0,1,fp8,fp8,0,18.6419194539388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,fp8,0,10.44701894124349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,96,64,0,1,fp8,fp8,0,11.772244771321615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,float16,0,7.752533594767253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,fp8,0,8.144383748372396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,2,64,0,1,float16,float16,0,8.109226862589518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,1,64,0,1,fp8,fp8,0,9.042773564656576
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,float16,0,8.006997426350912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,2,64,0,1,float16,fp8,0,8.185514450073242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,2,64,0,1,fp8,fp8,0,9.040213267008463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,fp8,0,7.422122955322266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,4,64,0,1,fp8,fp8,0,9.245183944702148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,float16,0,7.621802647908528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,fp8,0,7.922346750895183
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,float16,0,5.344767888387044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,fp8,0,5.086549441019694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,float16,0,3.6526079177856445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,96,64,0,1,fp8,fp8,0,5.779797236124675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,8,64,0,1,fp8,fp8,0,9.299968083699545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,fp8,0,3.665066719055176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,1,64,0,1,fp8,fp8,0,4.429994583129883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,2,64,0,1,float16,float16,0,3.6338345209757485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,2,64,0,1,float16,fp8,0,3.6113065083821616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,float16,0,3.64680544535319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,fp8,0,3.537407875061035
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,2,64,0,1,fp8,fp8,0,4.4317013422648115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,4,64,0,1,fp8,fp8,0,4.52403195699056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,float16,0,3.7160959243774414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,fp8,0,3.7587626775105796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,float16,0,2.5687039693196616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,fp8,0,2.446165402730306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,float16,0,1.6708265940348308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,96,64,0,1,fp8,fp8,0,2.83409055074056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,8,64,0,1,fp8,fp8,0,4.525226593017578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,fp8,0,1.7143467267354329
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,2,64,0,1,float16,float16,0,1.7078612645467122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,1,64,0,1,fp8,fp8,0,2.1449386278788247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,2,64,0,1,float16,fp8,0,1.6704853375752766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,fp8,0,1.6894292831420898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,2,64,0,1,fp8,fp8,0,2.1464746793111167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,float16,0,1.6865280469258626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,float16,0,1.7942186991373699
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,4,64,0,1,fp8,fp8,0,2.1741226514180503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,fp8,0,1.7571840286254883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,8,64,0,1,fp8,fp8,0,2.2010879516601562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,float16,0,1.2253866990407307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,fp8,0,1.1639466285705566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,fp8,0,0.8166399796803793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,96,64,0,1,fp8,fp8,0,1.4308692614237468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,float16,0,0.7837013403574625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,1,64,0,1,fp8,fp8,0,1.0803199609120686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,2,64,0,1,float16,float16,0,0.7999146779378256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,2,64,0,1,float16,fp8,0,0.8082773685455322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,2,64,0,1,fp8,fp8,0,1.0852693716684978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,float16,0,0.7915519873301188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,fp8,0,0.8055466810862223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,4,64,0,1,fp8,fp8,0,1.0900479952494304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,float16,0,0.7826773325602213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,fp8,0,0.7808000246683756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,8,64,0,1,fp8,fp8,0,1.1043840249379475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,float16,0,0.5097813208897909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,fp8,0,0.4476586580276489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,96,64,0,1,fp8,fp8,0,0.7323306401570638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,float16,0,0.45021867752075195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,fp8,0,0.44544001420338947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,2,64,0,1,float16,float16,0,0.44970667362213135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,1,64,0,1,fp8,fp8,0,0.5923839807510376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,2,64,0,1,float16,fp8,0,0.4442453384399414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,2,64,0,1,fp8,fp8,0,0.5935786565144857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,float16,0,0.4503893454869588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,fp8,0,0.448853333791097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,float16,0,0.44339199860890705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,4,64,0,1,fp8,fp8,0,0.5952853361765543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,fp8,0,0.44697598616282147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,8,64,0,1,fp8,fp8,0,0.5923839807510376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,float16,0,19.603455861409504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,fp8,0,19.520341237386067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,2,64,0,1,float16,float16,0,19.475626627604168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,1,64,0,1,fp8,fp8,0,22.092971801757812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,2,64,0,1,float16,fp8,0,19.74459711710612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,float16,0,19.58502451578776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,fp8,0,19.371008555094402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,2,64,0,1,fp8,fp8,0,23.051605224609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,float16,0,10.205354690551758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,4,64,0,1,fp8,fp8,0,23.559168497721355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,float16,0,21.33538055419922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,fp8,0,14.04416020711263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,float16,0,14.850730895996094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,96,64,0,1,fp8,fp8,0,15.241386413574219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,fp8,0,19.806549072265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,8,64,0,1,fp8,fp8,0,23.910741170247395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,fp8,0,10.22822380065918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,1,64,0,1,fp8,fp8,0,10.864128112792969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,2,64,0,1,float16,float16,0,10.442581176757812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,2,64,0,1,float16,fp8,0,9.53770637512207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,2,64,0,1,fp8,fp8,0,10.983083089192709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,float16,0,9.53873062133789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,fp8,0,9.768789291381836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,4,64,0,1,fp8,fp8,0,11.093332926432291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,fp8,0,6.827178955078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,float16,0,7.181994756062825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,float16,0,9.712981541951498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,float16,0,4.572671890258789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,fp8,0,10.184362411499023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,96,64,0,1,fp8,fp8,0,7.419904073079427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,8,64,0,1,fp8,fp8,0,11.40462875366211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,fp8,0,4.603562672932942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,2,64,0,1,float16,float16,0,4.578645388285319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,1,64,0,1,fp8,fp8,0,5.342720031738281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,2,64,0,1,float16,fp8,0,4.748288154602051
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,2,64,0,1,fp8,fp8,0,5.402453104654948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,float16,0,4.709888140360515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,fp8,0,4.5400746663411455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,float16,0,4.620458602905273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,4,64,0,1,fp8,fp8,0,5.527039845784505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,fp8,0,3.2848211924235025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,float16,0,3.5024213790893555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,96,64,0,1,fp8,fp8,0,3.6565332412719727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,fp8,0,4.629845301310222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,8,64,0,1,fp8,fp8,0,5.485226949055989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,float16,0,2.172757307688395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,fp8,0,2.151594638824463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,1,64,0,1,fp8,fp8,0,2.571434656778971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,2,64,0,1,float16,float16,0,2.1690026919047036
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,2,64,0,1,float16,fp8,0,2.2166186968485513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,fp8,0,2.186581293741862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,float16,0,2.1724160512288413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,2,64,0,1,fp8,fp8,0,2.594816048940023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,float16,0,2.228223959604899
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,4,64,0,1,fp8,fp8,0,2.583893299102783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,fp8,0,2.2596267064412436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,8,64,0,1,fp8,fp8,0,2.668544133504232
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,fp8,0,0.9803093274434408
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,float16,0,1.6931840578715007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,fp8,0,1.5668907165527344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,float16,0,0.9980586369832357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,96,64,0,1,fp8,fp8,0,1.814527988433838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,1,64,0,1,fp8,fp8,0,1.2818773587544758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,2,64,0,1,float16,float16,0,0.9975466728210449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,2,64,0,1,float16,fp8,0,0.9869653383890787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,float16,0,1.0210986932118733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,fp8,0,0.9987413088480631
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,2,64,0,1,fp8,fp8,0,1.2934827009836833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,4,64,0,1,fp8,fp8,0,1.311743974685669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,float16,0,1.025877316792806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,fp8,0,1.0089813073476155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,8,64,0,1,fp8,fp8,0,1.3405866622924805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,float16,0,0.7837013403574625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,96,64,0,1,fp8,fp8,0,0.9198933442433676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,fp8,0,0.741376002629598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,float16,0,0.474453330039978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,fp8,0,0.4928853511810303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,1,64,0,1,fp8,fp8,0,0.6478506724039713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,2,64,0,1,float16,float16,0,0.47377065817515057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,2,64,0,1,float16,fp8,0,0.48742401599884033
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,2,64,0,1,fp8,fp8,0,0.6480213403701782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,fp8,0,0.4957866668701172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,4,64,0,1,fp8,fp8,0,0.6487040122350057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,float16,0,0.48503466447194415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,float16,0,0.4805973370869954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,8,64,0,1,fp8,fp8,0,0.6526293357213339
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,fp8,0,0.47598934173583984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,float16,0,0.2845013340314229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,fp8,0,0.2826240062713623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,float16,0,0.2921813329060872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,96,64,0,1,fp8,fp8,0,0.446122686068217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,fp8,0,0.29286400477091473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,1,64,0,1,fp8,fp8,0,0.3712000052134196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,2,64,0,1,float16,float16,0,0.291157325108846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,2,64,0,1,float16,fp8,0,0.29474133253097534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,2,64,0,1,fp8,fp8,0,0.36983466148376465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,float16,0,0.2955946723620097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,fp8,0,0.29781333605448407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,fp8,0,0.2935466567675273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,4,64,0,1,fp8,fp8,0,0.37307735284169513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,float16,0,0.2867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,8,64,0,1,fp8,fp8,0,0.37137067317962646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,float16,0,20.49262873331706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,fp8,0,20.871339162190754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,1,64,0,1,fp8,fp8,0,22.94476826985677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,2,64,0,1,float16,float16,0,21.375999450683594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,2,64,0,1,float16,fp8,0,21.350570678710938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,float16,0,22.016342163085938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,2,64,0,1,fp8,fp8,0,23.68511962890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,fp8,0,21.67705535888672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,float16,0,9.775274912516275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,4,64,0,1,fp8,fp8,0,24.426836649576824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,float16,0,22.681259155273438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,fp8,0,16.698368072509766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,float16,0,17.6541010538737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,96,64,0,1,fp8,fp8,0,17.686869303385418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,fp8,0,22.070955912272137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,8,64,0,1,fp8,fp8,0,24.863744099934895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,fp8,0,9.90549341837565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,1,64,0,1,fp8,fp8,0,10.917887369791666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,2,64,0,1,float16,float16,0,9.924949645996094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,2,64,0,1,float16,fp8,0,9.971200307210287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,2,64,0,1,fp8,fp8,0,11.22662353515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,float16,0,10.046634674072266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,fp8,0,10.292224248250326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,4,64,0,1,fp8,fp8,0,11.362815856933594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,float16,0,10.13862419128418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,fp8,0,9.95413335164388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,8,64,0,1,fp8,fp8,0,11.798868815104166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,float16,0,4.798293431599935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,float16,0,8.53384526570638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,fp8,0,8.107349395751953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,96,64,0,1,fp8,fp8,0,8.3056640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,fp8,0,4.750677426656087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,1,64,0,1,fp8,fp8,0,5.400063832600911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,2,64,0,1,float16,float16,0,4.802559852600098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,2,64,0,1,float16,fp8,0,4.887722651163737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,2,64,0,1,fp8,fp8,0,5.455701192220052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,float16,0,4.899498621622722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,fp8,0,4.823210716247559
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,float16,0,4.973567962646484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,4,64,0,1,fp8,fp8,0,5.625343958536784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,fp8,0,4.874410629272461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,8,64,0,1,fp8,fp8,0,5.602474848429362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,fp8,0,3.956735928853353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,float16,0,2.3256746927897134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,float16,0,4.179114659627278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,96,64,0,1,fp8,fp8,0,4.0635732014973955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,fp8,0,2.2964906692504883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,1,64,0,1,fp8,fp8,0,2.579626719156901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,2,64,0,1,float16,float16,0,2.310314655303955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,2,64,0,1,fp8,fp8,0,2.615466594696045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,2,64,0,1,float16,fp8,0,2.320042610168457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,float16,0,2.3579306602478027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,fp8,0,2.367146650950114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,float16,0,2.412031968434652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,4,64,0,1,fp8,fp8,0,2.6410667101542153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,fp8,0,2.441727956136068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,float16,0,1.9991893768310547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,float16,0,1.0873173077901204
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,96,64,0,1,fp8,fp8,0,1.9875839551289876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,8,64,0,1,fp8,fp8,0,2.668544133504232
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,fp8,0,1.0676906903584797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,fp8,0,1.8867200215657551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,1,64,0,1,fp8,fp8,0,1.2960426807403564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,2,64,0,1,float16,float16,0,1.1009706656138103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,2,64,0,1,float16,fp8,0,1.0762240091959636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,float16,0,1.0949973265329997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,2,64,0,1,fp8,fp8,0,1.308672030766805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,4,64,0,1,fp8,fp8,0,1.32369065284729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,fp8,0,1.081173340479533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,float16,0,1.1395413080851238
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,fp8,0,1.1409066518147786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,8,64,0,1,fp8,fp8,0,1.354581356048584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,float16,0,0.9540266990661621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,fp8,0,0.8884906768798828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,96,64,0,1,fp8,fp8,0,1.0006186962127686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,fp8,0,0.4601173400878906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,float16,0,0.45670398076375324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,1,64,0,1,fp8,fp8,0,0.622592012087504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,2,64,0,1,float16,float16,0,0.46779731909434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,2,64,0,1,float16,fp8,0,0.47377065817515057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,2,64,0,1,fp8,fp8,0,0.6270293394724528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,float16,0,0.47837865352630615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,fp8,0,0.47138134638468426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,float16,0,0.4742826620737712
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,4,64,0,1,fp8,fp8,0,0.6386346817016602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,fp8,0,0.46609067916870117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,8,64,0,1,fp8,fp8,0,0.6615039904912313
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,float16,0,0.3619840145111084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,fp8,0,0.29764266808827716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,96,64,0,1,fp8,fp8,0,0.5012480020523071
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,fp8,0,0.23142399390538534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,float16,0,0.23193599780400595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,1,64,0,1,fp8,fp8,0,0.3160746693611145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,2,64,0,1,float16,float16,0,0.23091200987497965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,2,64,0,1,float16,fp8,0,0.2321066657702128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,2,64,0,1,fp8,fp8,0,0.3201706608136495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,float16,0,0.2373973329861959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,fp8,0,0.23569067319234213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,4,64,0,1,fp8,fp8,0,0.3211946686108907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,float16,0,0.23552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,fp8,0,0.22971733411153158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,8,64,0,1,fp8,fp8,0,0.32307199637095135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,fp8,0,0.13209600249926248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,float16,0,0.13209600249926248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,96,64,0,1,fp8,fp8,0,0.1800533334414164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,float16,0,0.12663466731707254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,fp8,0,0.12782933314641318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,1,64,0,1,fp8,fp8,0,0.17510400215784708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,2,64,0,1,float16,float16,0,0.1269760032494863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,2,64,0,1,float16,fp8,0,0.1276586651802063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,2,64,0,1,fp8,fp8,0,0.1730560064315796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,float16,0,0.13209600249926248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,fp8,0,0.13158399860064188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,4,64,0,1,fp8,fp8,0,0.17151999473571777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,float16,0,0.13090133666992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,fp8,0,0.13090133666992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,8,64,0,1,fp8,fp8,0,0.17322667439778647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,1,64,0,1,float16,float16,0,15.51633071899414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,1,64,0,1,float16,fp8,0,15.774890899658203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,1,64,0,1,fp8,fp8,0,15.913130442301432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,2,64,0,1,float16,float16,0,16.346282958984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,2,64,0,1,float16,fp8,0,16.31317392985026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,2,64,0,1,fp8,fp8,0,16.636586507161457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,4,64,0,1,float16,float16,0,16.948394775390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,4,64,0,1,float16,fp8,0,16.58589808146159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,4,64,0,1,fp8,fp8,0,17.391957600911457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,8,64,0,1,float16,fp8,0,17.00113042195638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,8,64,0,1,float16,float16,0,17.38973871866862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,1,64,0,1,float16,float16,0,7.098367691040039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,96,64,0,1,float16,float16,0,16.02133305867513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,96,64,0,1,float16,fp8,0,15.085226694742838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,8,64,0,1,fp8,fp8,0,18.039637247721355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,96,64,0,1,fp8,fp8,0,14.470315297444662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,1,64,0,1,float16,fp8,0,7.1393280029296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,1,64,0,1,fp8,fp8,0,7.373141606648763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,2,64,0,1,float16,float16,0,7.437994639078776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,2,64,0,1,float16,fp8,0,7.263231913248698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,2,64,0,1,fp8,fp8,0,7.539712270100911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,4,64,0,1,float16,float16,0,7.45250129699707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,4,64,0,1,float16,fp8,0,7.423829396565755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,4,64,0,1,fp8,fp8,0,7.837866465250651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,8,64,0,1,float16,fp8,0,7.694506963094075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,8,64,0,1,float16,float16,0,7.747754414876302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,8,64,0,1,fp8,fp8,0,8.037546793619791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,96,64,0,1,float16,float16,0,7.742634455362956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,1,64,0,1,float16,float16,0,3.475285212198893
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,1,64,0,1,float16,fp8,0,3.5218772888183594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,96,64,0,1,float16,fp8,0,7.282517115275065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,96,64,0,1,fp8,fp8,0,6.610602696736653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,1,64,0,1,fp8,fp8,0,3.603626569112142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,2,64,0,1,float16,float16,0,3.529557228088379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,2,64,0,1,float16,fp8,0,3.5681279500325522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,2,64,0,1,fp8,fp8,0,3.6253013610839844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,4,64,0,1,float16,float16,0,3.561984062194824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,4,64,0,1,float16,fp8,0,3.604309399922689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,4,64,0,1,fp8,fp8,0,3.7498881022135415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,8,64,0,1,float16,float16,0,3.8355627059936523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,8,64,0,1,float16,fp8,0,3.6952746709187827
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,8,64,0,1,fp8,fp8,0,3.772245407104492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,96,64,0,1,float16,float16,0,3.752277374267578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,96,64,0,1,float16,fp8,0,3.5572052001953125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,96,64,0,1,fp8,fp8,0,3.2220160166422525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,1,64,0,1,float16,fp8,0,1.669973373413086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,1,64,0,1,float16,float16,0,1.6807252566019695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,1,64,0,1,fp8,fp8,0,1.7329492568969727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,2,64,0,1,float16,float16,0,1.7068373362223308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,2,64,0,1,float16,fp8,0,1.6773120562235515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,2,64,0,1,fp8,fp8,0,1.7769813537597656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,4,64,0,1,float16,float16,0,1.724415938059489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,4,64,0,1,float16,fp8,0,1.718272050221761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,4,64,0,1,fp8,fp8,0,1.7933653195699055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,8,64,0,1,float16,float16,0,1.828181266784668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,8,64,0,1,float16,fp8,0,1.7959252993265789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,8,64,0,1,fp8,fp8,0,1.8513919512430828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,96,64,0,1,float16,float16,0,1.7925119400024414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,96,64,0,1,float16,fp8,0,1.66758394241333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,1,64,0,1,float16,float16,0,0.7611733277638754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,1,64,0,1,float16,fp8,0,0.7640746434529623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,96,64,0,1,fp8,fp8,0,1.5743999481201172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,2,64,0,1,float16,float16,0,0.7666347026824951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,1,64,0,1,fp8,fp8,0,0.8791039784749349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,2,64,0,1,float16,fp8,0,0.7741440137227377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,2,64,0,1,fp8,fp8,0,0.8809813658396403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,4,64,0,1,float16,float16,0,0.7968426545461019
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,4,64,0,1,float16,fp8,0,0.7852373123168945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,4,64,0,1,fp8,fp8,0,0.9038506348927816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,8,64,0,1,float16,fp8,0,0.8253440062204996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,8,64,0,1,float16,float16,0,0.839680035909017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,8,64,0,1,fp8,fp8,0,0.934229294459025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,96,64,0,1,float16,float16,0,0.8084479967753092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,1,64,0,1,float16,float16,0,0.31010133028030396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,96,64,0,1,float16,fp8,0,0.7661226590474447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,96,64,0,1,fp8,fp8,0,0.785749355951945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,1,64,0,1,float16,fp8,0,0.3078826665878296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,1,64,0,1,fp8,fp8,0,0.4092586835225423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,2,64,0,1,float16,float16,0,0.30668799082438153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,2,64,0,1,float16,fp8,0,0.31385600566864014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,2,64,0,1,fp8,fp8,0,0.4154026508331299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,4,64,0,1,float16,float16,0,0.3165866732597351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,4,64,0,1,float16,fp8,0,0.31061333417892456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,8,64,0,1,float16,float16,0,0.32307199637095135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,4,64,0,1,fp8,fp8,0,0.42803200085957843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,8,64,0,1,float16,fp8,0,0.31436800956726074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,96,64,0,1,float16,float16,0,0.2846720019976298
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,8,64,0,1,fp8,fp8,0,0.4478293259938558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,96,64,0,1,float16,fp8,0,0.22766933838526407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,96,64,0,1,fp8,fp8,0,0.3930453459421794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,1,64,0,1,float16,float16,0,0.15223466356595358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,1,64,0,1,float16,fp8,0,0.15103999773661295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,1,64,0,1,fp8,fp8,0,0.2058239976565043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,2,64,0,1,float16,float16,0,0.15155200163523355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,2,64,0,1,float16,fp8,0,0.15428266922632852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,2,64,0,1,fp8,fp8,0,0.20104533433914185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,4,64,0,1,float16,float16,0,0.1599146624406179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,4,64,0,1,float16,fp8,0,0.15889066457748413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,8,64,0,1,float16,float16,0,0.15411200126012167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,4,64,0,1,fp8,fp8,0,0.20292266209920248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,8,64,0,1,float16,fp8,0,0.15581867098808289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,8,64,0,1,fp8,fp8,0,0.20462934176127115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,96,64,0,1,float16,fp8,0,0.09045333663622539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,96,64,0,1,float16,float16,0,0.091648002465566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,96,64,0,1,fp8,fp8,0,0.1186133325099945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,1,64,0,1,float16,float16,0,0.08840533097585042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,1,64,0,1,float16,fp8,0,0.08840533097585042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,1,64,0,1,fp8,fp8,0,0.11315199732780457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,2,64,0,1,float16,float16,0,0.08874666690826416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,2,64,0,1,float16,fp8,0,0.08891733487447102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,2,64,0,1,fp8,fp8,0,0.1129813293615977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,4,64,0,1,float16,float16,0,0.091648002465566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,4,64,0,1,float16,fp8,0,0.09096533060073853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,4,64,0,1,fp8,fp8,0,0.11212799946467082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,8,64,0,1,float16,float16,0,0.08874666690826416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,8,64,0,1,float16,fp8,0,0.0890880028406779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,96,64,0,1,float16,float16,0,0.054101333022117615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,8,64,0,1,fp8,fp8,0,0.11246933539708455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,96,64,0,1,float16,fp8,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,96,64,0,1,fp8,fp8,0,0.07048533360163371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,1,64,0,1,float16,fp8,0,0.05034666756788889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,1,64,0,1,float16,float16,0,0.05120000243186951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,1,64,0,1,fp8,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,2,64,0,1,float16,float16,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,2,64,0,1,float16,fp8,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,2,64,0,1,fp8,fp8,0,0.06894933183987935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,4,64,0,1,float16,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,4,64,0,1,float16,float16,0,0.05376000205675761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,4,64,0,1,fp8,fp8,0,0.06980266670385997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,8,64,0,1,float16,fp8,0,0.05222400029500326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,8,64,0,1,float16,float16,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,8,64,0,1,fp8,fp8,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,1,64,0,1,float16,float16,0,6.064128239949544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,1,64,0,1,float16,fp8,0,6.047402699788411
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,1,64,0,1,fp8,fp8,0,5.549568176269531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,2,64,0,1,float16,float16,0,6.440106709798177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,2,64,0,1,float16,fp8,0,6.38481076558431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,2,64,0,1,fp8,fp8,0,5.692757288614909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,4,64,0,1,float16,float16,0,6.6459306081136065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,4,64,0,1,float16,fp8,0,6.572544097900391
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,4,64,0,1,fp8,fp8,0,5.942101160685222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,8,64,0,1,float16,float16,0,6.966784159342448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,8,64,0,1,fp8,fp8,0,6.160213470458984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,8,64,0,1,float16,fp8,0,6.848853429158528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,1,64,0,1,float16,float16,0,2.9322239557902017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,96,64,0,1,float16,fp8,0,7.128746668497722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,96,64,0,1,float16,float16,0,7.580501556396484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,96,64,0,1,fp8,fp8,0,5.844309488932292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,1,64,0,1,float16,fp8,0,2.923520088195801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,1,64,0,1,fp8,fp8,0,2.6530133883158364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,2,64,0,1,float16,float16,0,3.0750719706217446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,2,64,0,1,float16,fp8,0,3.0670506159464517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,2,64,0,1,fp8,fp8,0,2.7013120651245117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,4,64,0,1,float16,float16,0,3.17627747853597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,4,64,0,1,fp8,fp8,0,2.8090025583902993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,4,64,0,1,float16,fp8,0,3.150848070780436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,8,64,0,1,float16,float16,0,3.358037312825521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,8,64,0,1,float16,fp8,0,3.3203201293945312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,8,64,0,1,fp8,fp8,0,2.9105494817097983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,96,64,0,1,float16,float16,0,3.6826454798380532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,96,64,0,1,float16,fp8,0,3.4757973353068032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,1,64,0,1,float16,float16,0,1.4632959365844727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,1,64,0,1,float16,fp8,0,1.4626132647196453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,96,64,0,1,fp8,fp8,0,2.8149760564168296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,1,64,0,1,fp8,fp8,0,1.3535572687784831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,2,64,0,1,float16,float16,0,1.5201279322306316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,2,64,0,1,float16,fp8,0,1.5170559883117676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,2,64,0,1,fp8,fp8,0,1.3849600156148274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,4,64,0,1,float16,float16,0,1.5880533854166667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,4,64,0,1,fp8,fp8,0,1.3847893079121907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,4,64,0,1,float16,fp8,0,1.5103999773661296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,8,64,0,1,float16,float16,0,1.675605297088623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,8,64,0,1,float16,fp8,0,1.6061439514160156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,8,64,0,1,fp8,fp8,0,1.4388906160990398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,96,64,0,1,float16,float16,0,1.7872212727864583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,96,64,0,1,float16,fp8,0,1.675605297088623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,1,64,0,1,float16,float16,0,0.6220800081888834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,1,64,0,1,float16,fp8,0,0.624127984046936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,96,64,0,1,fp8,fp8,0,1.3789866765340169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,1,64,0,1,fp8,fp8,0,0.6608213186264038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,2,64,0,1,float16,float16,0,0.6463146607081095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,2,64,0,1,float16,fp8,0,0.6466559966405233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,2,64,0,1,fp8,fp8,0,0.672597328821818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,4,64,0,1,float16,float16,0,0.6768639882405599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,4,64,0,1,float16,fp8,0,0.6705493132273356
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,4,64,0,1,fp8,fp8,0,0.6930773258209229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,8,64,0,1,float16,float16,0,0.7323306401570638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,8,64,0,1,float16,fp8,0,0.721407969792684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,8,64,0,1,fp8,fp8,0,0.7195306619008383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,96,64,0,1,float16,float16,0,0.8178346951802572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,96,64,0,1,float16,fp8,0,0.7611733277638754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,96,64,0,1,fp8,fp8,0,0.6912000179290771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,1,64,0,1,float16,float16,0,0.2239146629969279
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,1,64,0,1,float16,fp8,0,0.226474662621816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,1,64,0,1,fp8,fp8,0,0.3022506634394328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,2,64,0,1,float16,float16,0,0.22732800245285034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,2,64,0,1,float16,fp8,0,0.22272000710169473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,2,64,0,1,fp8,fp8,0,0.30702932675679523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,4,64,0,1,float16,float16,0,0.23347200949986777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,4,64,0,1,float16,fp8,0,0.2321066657702128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,4,64,0,1,fp8,fp8,0,0.32051199674606323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,8,64,0,1,float16,float16,0,0.25412267446517944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,8,64,0,1,float16,fp8,0,0.24422399202982584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,8,64,0,1,fp8,fp8,0,0.3428693215052287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,96,64,0,1,float16,float16,0,0.2578773299853007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,96,64,0,1,float16,fp8,0,0.1930239995320638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,96,64,0,1,fp8,fp8,0,0.3397973378499349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,1,64,0,1,float16,fp8,0,0.1160533328851064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,1,64,0,1,float16,float16,0,0.11178666353225708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,1,64,0,1,fp8,fp8,0,0.14472533265749613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,2,64,0,1,float16,float16,0,0.1155413289864858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,2,64,0,1,float16,fp8,0,0.11229866743087769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,2,64,0,1,fp8,fp8,0,0.1443839967250824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,4,64,0,1,float16,float16,0,0.11707733074824016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,4,64,0,1,float16,fp8,0,0.11264000336329143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,8,64,0,1,float16,float16,0,0.11468799908955891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,4,64,0,1,fp8,fp8,0,0.14472533265749613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,8,64,0,1,float16,fp8,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,8,64,0,1,fp8,fp8,0,0.14728533228238425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,96,64,0,1,float16,float16,0,0.06911999980608623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,96,64,0,1,float16,fp8,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,96,64,0,1,fp8,fp8,0,0.09028266867001851
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,1,64,0,1,float16,float16,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,1,64,0,1,float16,fp8,0,0.06348800162474315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,1,64,0,1,fp8,fp8,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,2,64,0,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,2,64,0,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,2,64,0,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,4,64,0,1,float16,float16,0,0.06434133152167003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,4,64,0,1,float16,fp8,0,0.06553600231806438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,4,64,0,1,fp8,fp8,0,0.08089600006739299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,8,64,0,1,float16,float16,0,0.06451199948787689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,8,64,0,1,float16,fp8,0,0.06297599772612254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,8,64,0,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,96,64,0,1,float16,float16,0,0.039936001102129616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,96,64,0,1,float16,fp8,0,0.04027733455101649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,96,64,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,1,64,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,1,64,0,1,float16,fp8,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,2,64,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,1,64,0,1,fp8,fp8,0,0.04795733094215393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,2,64,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,2,64,0,1,fp8,fp8,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,4,64,0,1,float16,float16,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,4,64,0,1,float16,fp8,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,4,64,0,1,fp8,fp8,0,0.048810665806134544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,8,64,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,8,64,0,1,float16,float16,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,8,64,0,1,fp8,fp8,0,0.04761599997679392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,96,64,0,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,96,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,96,64,0,1,fp8,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,1,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,1,64,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,1,64,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,2,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,2,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,2,64,0,1,fp8,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,4,64,0,1,float16,float16,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,4,64,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,4,64,0,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,8,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,8,64,0,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,8,64,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,1,64,0,1,float16,float16,0,2.9771092732747397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,1,64,0,1,fp8,fp8,0,2.2743040720621743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,1,64,0,1,float16,fp8,0,2.97267214457194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,2,64,0,1,float16,float16,0,3.050325393676758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,2,64,0,1,float16,fp8,0,3.0417919158935547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,2,64,0,1,fp8,fp8,0,2.3282346725463867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,4,64,0,1,float16,float16,0,3.1232000986735025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,4,64,0,1,float16,fp8,0,3.0928214391072593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,4,64,0,1,fp8,fp8,0,2.415445327758789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,8,64,0,1,float16,float16,0,3.3535998662312827
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,8,64,0,1,float16,fp8,0,3.292330741882324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,8,64,0,1,fp8,fp8,0,2.5168213844299316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,96,64,0,1,float16,float16,0,3.700906753540039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,96,64,0,1,float16,fp8,0,3.4850133260091147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,1,64,0,1,float16,float16,0,1.4327467282613118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,96,64,0,1,fp8,fp8,0,2.7775999704996743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,1,64,0,1,float16,fp8,0,1.4306987126668294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,1,64,0,1,fp8,fp8,0,1.1332266330718994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,2,64,0,1,float16,float16,0,1.4687573115030925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,2,64,0,1,float16,fp8,0,1.4621013005574544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,2,64,0,1,fp8,fp8,0,1.1468799908955891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,4,64,0,1,float16,fp8,0,1.5011839866638184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,4,64,0,1,float16,float16,0,1.5115946133931477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,4,64,0,1,fp8,fp8,0,1.178282658259074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,8,64,0,1,float16,float16,0,1.6168959935506184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,8,64,0,1,float16,fp8,0,1.606826623280843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,8,64,0,1,fp8,fp8,0,1.2306773662567139
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,96,64,0,1,float16,float16,0,1.783125400543213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,96,64,0,1,float16,fp8,0,1.679701328277588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,96,64,0,1,fp8,fp8,0,1.3660160700480144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,1,64,0,1,float16,float16,0,0.668842633565267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,1,64,0,1,float16,fp8,0,0.6652586857477824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,1,64,0,1,fp8,fp8,0,0.5959680080413818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,2,64,0,1,float16,float16,0,0.6778879960378011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,2,64,0,1,float16,fp8,0,0.6775466601053873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,2,64,0,1,fp8,fp8,0,0.598527987798055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,4,64,0,1,float16,float16,0,0.6993920008341471
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,4,64,0,1,fp8,fp8,0,0.6019413471221924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,4,64,0,1,float16,fp8,0,0.702122688293457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,8,64,0,1,float16,float16,0,0.733354647954305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,8,64,0,1,float16,fp8,0,0.7162880102793375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,8,64,0,1,fp8,fp8,0,0.6251519918441772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,96,64,0,1,float16,float16,0,0.8304639657338461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,96,64,0,1,float16,fp8,0,0.7657813231150309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,96,64,0,1,fp8,fp8,0,0.6700373490651449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,1,64,0,1,float16,float16,0,0.17851734161376953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,1,64,0,1,fp8,fp8,0,0.2568533420562744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,1,64,0,1,float16,fp8,0,0.1776640017827352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,2,64,0,1,float16,float16,0,0.18346667289733887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,2,64,0,1,float16,fp8,0,0.1858560045560201
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,2,64,0,1,fp8,fp8,0,0.25975465774536133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,4,64,0,1,float16,float16,0,0.19285333156585693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,4,64,0,1,float16,fp8,0,0.1884160041809082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,8,64,0,1,float16,float16,0,0.22101332743962607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,4,64,0,1,fp8,fp8,0,0.2701653242111206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,8,64,0,1,float16,fp8,0,0.2126506765683492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,8,64,0,1,fp8,fp8,0,0.2940586606661479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,96,64,0,1,float16,fp8,0,0.18875734011332193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,96,64,0,1,float16,float16,0,0.26026666164398193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,1,64,0,1,float16,float16,0,0.08618666728337605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,96,64,0,1,fp8,fp8,0,0.32443734010060626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,1,64,0,1,float16,fp8,0,0.08584533135096233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,1,64,0,1,fp8,fp8,0,0.1114453375339508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,2,64,0,1,float16,float16,0,0.08584533135096233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,2,64,0,1,float16,fp8,0,0.0865280032157898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,2,64,0,1,fp8,fp8,0,0.11008000373840332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,4,64,0,1,float16,float16,0,0.08840533097585042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,4,64,0,1,float16,fp8,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,4,64,0,1,fp8,fp8,0,0.11281067132949829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,8,64,0,1,float16,float16,0,0.08755200107892354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,8,64,0,1,float16,fp8,0,0.08874666690826416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,8,64,0,1,fp8,fp8,0,0.11229866743087769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,96,64,0,1,float16,float16,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,96,64,0,1,float16,fp8,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,96,64,0,1,fp8,fp8,0,0.07099733253320058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,1,64,0,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,1,64,0,1,float16,fp8,0,0.04761599997679392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,1,64,0,1,fp8,fp8,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,2,64,0,1,float16,float16,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,2,64,0,1,float16,fp8,0,0.04795733094215393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,2,64,0,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,4,64,0,1,float16,float16,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,4,64,0,1,float16,fp8,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,4,64,0,1,fp8,fp8,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,8,64,0,1,float16,float16,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,8,64,0,1,float16,fp8,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,8,64,0,1,fp8,fp8,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,96,64,0,1,float16,float16,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,96,64,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,96,64,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,1,64,0,1,float16,float16,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,1,64,0,1,float16,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,1,64,0,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,2,64,0,1,float16,float16,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,2,64,0,1,float16,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,2,64,0,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,4,64,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,4,64,0,1,float16,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,4,64,0,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,8,64,0,1,float16,float16,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,8,64,0,1,float16,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,8,64,0,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,96,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,96,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,96,64,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,1,64,0,1,float16,float16,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,1,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,1,64,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,2,64,0,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,2,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,2,64,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,4,64,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,4,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,4,64,0,1,fp8,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,8,64,0,1,float16,float16,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,8,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,8,64,0,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,96,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,96,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,96,64,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,1,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,1,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,1,64,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,2,64,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,2,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,2,64,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,4,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,4,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,8,64,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,4,64,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,8,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,8,64,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,1,64,0,1,float16,float16,0,1.4511787096659343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,1,64,0,1,float16,fp8,0,1.4535679817199707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,1,64,0,1,fp8,fp8,0,1.4916267395019531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,2,64,0,1,float16,float16,0,1.489408016204834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,2,64,0,1,float16,fp8,0,1.5150079727172852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,2,64,0,1,fp8,fp8,0,1.5506772994995117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,4,64,0,1,float16,float16,0,1.530197302500407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,4,64,0,1,float16,fp8,0,1.5184213320414226
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,4,64,0,1,fp8,fp8,0,1.5904426574707031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,8,64,0,1,float16,float16,0,1.6505173047383626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,8,64,0,1,float16,fp8,0,1.6163840293884277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,96,64,0,1,float16,float16,0,1.8164052963256836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,1,64,0,1,float16,float16,0,0.6340266863505045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,8,64,0,1,fp8,fp8,0,1.6356693903605144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,96,64,0,1,float16,fp8,0,1.695573329925537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,96,64,0,1,fp8,fp8,0,1.4532267252604167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,1,64,0,1,float16,fp8,0,0.636245330174764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,1,64,0,1,fp8,fp8,0,0.7650986512502035
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,2,64,0,1,float16,float16,0,0.6505813201268514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,2,64,0,1,fp8,fp8,0,0.7893333435058594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,2,64,0,1,float16,fp8,0,0.6444373528162638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,4,64,0,1,float16,fp8,0,0.672597328821818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,4,64,0,1,float16,float16,0,0.6947840054829916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,4,64,0,1,fp8,fp8,0,0.7997439702351888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,8,64,0,1,float16,float16,0,0.7350613276163737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,8,64,0,1,float16,fp8,0,0.7219200134277344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,8,64,0,1,fp8,fp8,0,0.816810687383016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,96,64,0,1,float16,float16,0,0.8463359673817953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,96,64,0,1,float16,fp8,0,0.772437334060669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,1,64,0,1,float16,float16,0,0.18244266510009766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,96,64,0,1,fp8,fp8,0,0.7255040009816488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,1,64,0,1,float16,fp8,0,0.18210132916768393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,1,64,0,1,fp8,fp8,0,0.3546453317006429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,2,64,0,1,float16,float16,0,0.176639993985494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,2,64,0,1,float16,fp8,0,0.1904639999071757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,2,64,0,1,fp8,fp8,0,0.34969600041707355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,4,64,0,1,float16,float16,0,0.2039466698964437
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,4,64,0,1,float16,fp8,0,0.18363734086354574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,8,64,0,1,float16,float16,0,0.21640533208847046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,4,64,0,1,fp8,fp8,0,0.3660800059636434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,8,64,0,1,float16,fp8,0,0.21196800470352173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,8,64,0,1,fp8,fp8,0,0.38707200686136883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,96,64,0,1,float16,float16,0,0.25975465774536133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,96,64,0,1,float16,fp8,0,0.19182932376861572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,96,64,0,1,fp8,fp8,0,0.3628373146057129
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,1,64,0,1,float16,fp8,0,0.06911999980608623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,1,64,0,1,float16,float16,0,0.0682666649421056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,1,64,0,1,fp8,fp8,0,0.15308800339698792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,2,64,0,1,float16,float16,0,0.08891733487447102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,2,64,0,1,float16,fp8,0,0.06946133573849995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,4,64,0,1,float16,float16,0,0.07167999943097432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,2,64,0,1,fp8,fp8,0,0.15377066532770792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,4,64,0,1,float16,fp8,0,0.07082666456699371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,4,64,0,1,fp8,fp8,0,0.1532586713631948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,8,64,0,1,float16,fp8,0,0.0718506673971812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,8,64,0,1,float16,float16,0,0.07065600156784058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,8,64,0,1,fp8,fp8,0,0.15530666708946228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,96,64,0,1,float16,float16,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,96,64,0,1,float16,fp8,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,96,64,0,1,fp8,fp8,0,0.0981333355108897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,1,64,0,1,float16,float16,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,1,64,0,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,1,64,0,1,fp8,fp8,0,0.08482133348782857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,2,64,0,1,float16,float16,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,2,64,0,1,float16,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,2,64,0,1,fp8,fp8,0,0.08550399541854858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,4,64,0,1,float16,float16,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,4,64,0,1,fp8,fp8,0,0.08584533135096233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,4,64,0,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,8,64,0,1,float16,float16,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,8,64,0,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,8,64,0,1,fp8,fp8,0,0.08550399541854858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,96,64,0,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,96,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,96,64,0,1,fp8,fp8,0,0.05205333232879639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,1,64,0,1,float16,float16,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,1,64,0,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,1,64,0,1,fp8,fp8,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,2,64,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,2,64,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,2,64,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,4,64,0,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,4,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,4,64,0,1,fp8,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,8,64,0,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,8,64,0,1,float16,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,8,64,0,1,fp8,fp8,0,0.050517335534095764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,96,64,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,96,64,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,96,64,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,1,64,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,1,64,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,1,64,0,1,fp8,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,2,64,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,2,64,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,2,64,0,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,4,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,4,64,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,4,64,0,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,8,64,0,1,float16,float16,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,8,64,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,96,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,8,64,0,1,fp8,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,96,64,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,96,64,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,1,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,1,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,1,64,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,2,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,2,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,2,64,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,4,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,4,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,4,64,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,8,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,8,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,8,64,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,96,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,96,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,96,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,1,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,1,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,1,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,2,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,2,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,2,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,4,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,4,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,4,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,8,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,8,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,8,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,1,64,0,1,float16,fp8,0,0.6104746659596761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,1,64,0,1,float16,float16,0,0.6137173175811768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,1,64,0,1,fp8,fp8,0,1.1438079675038655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,2,64,0,1,float16,float16,0,0.618837316830953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,2,64,0,1,float16,fp8,0,0.6159360011418661
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,2,64,0,1,fp8,fp8,0,1.155413309733073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,4,64,0,1,float16,float16,0,0.6621866623560587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,4,64,0,1,float16,fp8,0,0.6526293357213339
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,8,64,0,1,float16,float16,0,0.7210666338602701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,4,64,0,1,fp8,fp8,0,1.1752106348673503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,8,64,0,1,float16,fp8,0,0.7098026275634766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,8,64,0,1,fp8,fp8,0,1.198421319325765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,96,64,0,1,float16,float16,0,0.8376320203145345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,96,64,0,1,float16,fp8,0,0.772266705830892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,96,64,0,1,fp8,fp8,0,0.9084586302439371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,1,64,0,1,float16,float16,0,0.16895999511082968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,1,64,0,1,float16,fp8,0,0.16810667514801025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,2,64,0,1,float16,float16,0,0.17510400215784708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,1,64,0,1,fp8,fp8,0,0.5423786640167236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,2,64,0,1,float16,fp8,0,0.1723733345667521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,2,64,0,1,fp8,fp8,0,0.5437440077463785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,4,64,0,1,float16,float16,0,0.18722132841746011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,4,64,0,1,float16,fp8,0,0.183296004931132
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,4,64,0,1,fp8,fp8,0,0.5577386617660522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,8,64,0,1,float16,float16,0,0.21248000860214233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,8,64,0,1,float16,fp8,0,0.2044586737950643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,8,64,0,1,fp8,fp8,0,0.5819733142852783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,96,64,0,1,float16,float16,0,0.2604373296101888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,96,64,0,1,float16,fp8,0,0.1889280080795288
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,96,64,0,1,fp8,fp8,0,0.4561920166015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,1,64,0,1,float16,float16,0,0.08106666803359985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,1,64,0,1,float16,fp8,0,0.0773119976123174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,2,64,0,1,float16,float16,0,0.07867733140786488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,1,64,0,1,fp8,fp8,0,0.2515626748402913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,2,64,0,1,float16,fp8,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,2,64,0,1,fp8,fp8,0,0.2512213389078776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,4,64,0,1,float16,float16,0,0.07935999830563863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,4,64,0,1,float16,fp8,0,0.07935999830563863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,4,64,0,1,fp8,fp8,0,0.252074658870697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,8,64,0,1,float16,float16,0,0.07970133423805237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,8,64,0,1,float16,fp8,0,0.08004266520341237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,96,64,0,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,8,64,0,1,fp8,fp8,0,0.2529279987017314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,96,64,0,1,float16,fp8,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,1,64,0,1,float16,float16,0,0.043178667624791466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,96,64,0,1,fp8,fp8,0,0.14643200238545737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,1,64,0,1,float16,fp8,0,0.043007999658584595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,1,64,0,1,fp8,fp8,0,0.13380266229311624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,2,64,0,1,float16,float16,0,0.043178667624791466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,2,64,0,1,float16,fp8,0,0.043178667624791466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,2,64,0,1,fp8,fp8,0,0.13414399822553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,4,64,0,1,float16,float16,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,4,64,0,1,float16,fp8,0,0.04437333345413208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,4,64,0,1,fp8,fp8,0,0.13397333025932312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,8,64,0,1,float16,float16,0,0.043178667624791466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,8,64,0,1,float16,fp8,0,0.043178667624791466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,8,64,0,1,fp8,fp8,0,0.13414399822553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,96,64,0,1,float16,float16,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,96,64,0,1,float16,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,96,64,0,1,fp8,fp8,0,0.0769706666469574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,1,64,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,1,64,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,1,64,0,1,fp8,fp8,0,0.07628799974918365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,2,64,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,2,64,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,2,64,0,1,fp8,fp8,0,0.07611733178297679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,4,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,4,64,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,4,64,0,1,fp8,fp8,0,0.07714133461316426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,8,64,0,1,float16,float16,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,8,64,0,1,float16,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,8,64,0,1,fp8,fp8,0,0.07645866771539052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,96,64,0,1,float16,float16,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,96,64,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,96,64,0,1,fp8,fp8,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,1,64,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,1,64,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,1,64,0,1,fp8,fp8,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,2,64,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,2,64,0,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,2,64,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,4,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,4,64,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,4,64,0,1,fp8,fp8,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,8,64,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,8,64,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,8,64,0,1,fp8,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,96,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,96,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,96,64,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,1,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,1,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,1,64,0,1,fp8,fp8,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,2,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,2,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,2,64,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,4,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,4,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,4,64,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,8,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,8,64,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,96,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,8,64,0,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,96,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,96,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,1,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,1,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,1,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,2,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,2,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,2,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,4,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,4,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,4,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,8,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,8,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,8,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,96,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,96,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,96,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,1,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,2,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,2,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,4,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,8,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,8,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,1,64,0,1,float16,float16,0,0.23603200912475586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,1,64,0,1,float16,fp8,0,0.23278933763504028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,1,64,0,1,fp8,fp8,0,0.9405439694722494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,2,64,0,1,float16,fp8,0,0.24115200837453207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,2,64,0,1,float16,float16,0,0.24661332368850708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,2,64,0,1,fp8,fp8,0,0.9477120240529379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,4,64,0,1,float16,float16,0,0.2529279987017314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,4,64,0,1,float16,fp8,0,0.252074658870697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,4,64,0,1,fp8,fp8,0,0.9617066383361816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,8,64,0,1,float16,float16,0,0.2744320034980774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,8,64,0,1,float16,fp8,0,0.26794666051864624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,96,64,0,1,float16,float16,0,0.25565866629282635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,8,64,0,1,fp8,fp8,0,0.9852586587270101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,96,64,0,1,float16,fp8,0,0.1930239995320638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,1,64,0,1,float16,float16,0,0.11161599556605022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,96,64,0,1,fp8,fp8,0,0.6580906709035238
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,1,64,0,1,float16,fp8,0,0.11110400160153706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,1,64,0,1,fp8,fp8,0,0.4500480095545451
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,2,64,0,1,float16,float16,0,0.11332266529401143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,2,64,0,1,float16,fp8,0,0.11332266529401143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,2,64,0,1,fp8,fp8,0,0.45209598541259766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,4,64,0,1,float16,float16,0,0.11315199732780457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,4,64,0,1,float16,fp8,0,0.11315199732780457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,4,64,0,1,fp8,fp8,0,0.4519253174463908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,8,64,0,1,float16,float16,0,0.11366400122642517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,8,64,0,1,float16,fp8,0,0.11349333326021831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,8,64,0,1,fp8,fp8,0,0.4532906611760457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,96,64,0,1,float16,float16,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,96,64,0,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,96,64,0,1,fp8,fp8,0,0.24849067131678262
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,1,64,0,1,float16,float16,0,0.0602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,1,64,0,1,float16,fp8,0,0.06007466713587443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,1,64,0,1,fp8,fp8,0,0.23466666539510092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,2,64,0,1,float16,float16,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,2,64,0,1,float16,fp8,0,0.06058666606744131
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,2,64,0,1,fp8,fp8,0,0.23278933763504028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,4,64,0,1,float16,float16,0,0.06109866499900818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,4,64,0,1,float16,fp8,0,0.06109866499900818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,4,64,0,1,fp8,fp8,0,0.233130673567454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,8,64,0,1,float16,float16,0,0.06058666606744131
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,8,64,0,1,float16,fp8,0,0.06058666606744131
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,8,64,0,1,fp8,fp8,0,0.233130673567454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,96,64,0,1,float16,float16,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,96,64,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,96,64,0,1,fp8,fp8,0,0.1269760032494863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,1,64,0,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,1,64,0,1,float16,float16,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,2,64,0,1,float16,float16,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,1,64,0,1,fp8,fp8,0,0.12492799758911133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,2,64,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,2,64,0,1,fp8,fp8,0,0.12526933352152506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,4,64,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,4,64,0,1,float16,fp8,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,4,64,0,1,fp8,fp8,0,0.12526933352152506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,8,64,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,8,64,0,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,8,64,0,1,fp8,fp8,0,0.12475732962290446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,96,64,0,1,float16,float16,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,96,64,0,1,float16,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,96,64,0,1,fp8,fp8,0,0.07133866846561432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,1,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,1,64,0,1,fp8,fp8,0,0.07167999943097432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,1,64,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,2,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,2,64,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,2,64,0,1,fp8,fp8,0,0.07116800049940745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,4,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,4,64,0,1,fp8,fp8,0,0.0718506673971812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,4,64,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,8,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,8,64,0,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,8,64,0,1,fp8,fp8,0,0.07202133536338806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,96,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,96,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,96,64,0,1,fp8,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,1,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,1,64,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,1,64,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,2,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,2,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,2,64,0,1,fp8,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,4,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,4,64,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,4,64,0,1,fp8,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,8,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,8,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,8,64,0,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,96,64,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,96,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,96,64,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,1,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,1,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,2,64,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,1,64,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,2,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,2,64,0,1,fp8,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,4,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,4,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,4,64,0,1,fp8,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,8,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,8,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,8,64,0,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,96,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,96,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,96,64,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,1,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,2,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,4,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,8,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,8,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,96,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,96,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,96,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,1,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,1,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,2,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,2,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,4,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,4,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,8,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,8,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,8,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,fp8,0,122.37687174479167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,float16,0,121.3660176595052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,float16,0,125.33811442057292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,fp8,0,123.46982828776042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,float16,0,120.27391560872395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,fp8,0,119.68955485026042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,1,64,0,1,fp8,fp8,0,155.72684733072916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,2,64,0,1,fp8,fp8,0,158.540283203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,fp8,0,64.17373657226562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,float16,0,63.79963684082031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,float16,0,59.79920959472656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,64,64,0,1,fp8,fp8,0,82.14903259277344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,float16,0,122.56392415364583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,fp8,0,123.3822733561198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,fp8,0,61.777069091796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,4,64,0,1,fp8,fp8,0,159.80970255533853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,8,64,0,1,fp8,fp8,0,161.83381144205728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,1,64,0,1,fp8,fp8,0,78.15202331542969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,float16,0,61.065216064453125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,fp8,0,60.8692881266276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,float16,0,61.90302022298177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,2,64,0,1,fp8,fp8,0,78.56008402506511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,fp8,0,61.068115234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,float16,0,30.924458821614582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,fp8,0,59.27611796061198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,float16,0,61.54905700683594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,4,64,0,1,fp8,fp8,0,78.92497253417969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,fp8,0,31.53595733642578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,8,64,0,1,fp8,fp8,0,79.06986490885417
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,float16,0,30.897664388020832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,64,64,0,1,fp8,fp8,0,40.530601501464844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,fp8,0,30.373204549153645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,float16,0,30.34282684326172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,1,64,0,1,fp8,fp8,0,38.47338612874349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,fp8,0,30.24401092529297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,float16,0,29.394602457682293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,2,64,0,1,fp8,fp8,0,39.15929667154948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,fp8,0,29.535914103190105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,float16,0,29.48863983154297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,float16,0,15.666858673095703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,4,64,0,1,fp8,fp8,0,38.70958964029948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,fp8,0,30.03016408284505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,fp8,0,15.429803212483725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,float16,0,15.443456013997396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,8,64,0,1,fp8,fp8,0,38.752766927083336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,64,64,0,1,fp8,fp8,0,20.561578114827473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,fp8,0,15.662762959798178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,float16,0,15.35982894897461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,1,64,0,1,fp8,fp8,0,19.745620727539062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,fp8,0,15.53390884399414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,2,64,0,1,fp8,fp8,0,19.588778177897137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,float16,0,15.161343892415365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,fp8,0,15.165269215901693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,4,64,0,1,fp8,fp8,0,19.906901041666668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,float16,0,15.135915120442709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,fp8,0,15.109973907470703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,8,64,0,1,fp8,fp8,0,19.718655904134113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,float16,0,74.08776346842448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,fp8,0,71.52622985839844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,float16,0,70.76625061035156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,fp8,0,74.55709838867188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,float16,0,71.47332255045573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,1,64,0,1,fp8,fp8,0,89.51978556315105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,fp8,0,72.64358520507812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,2,64,0,1,fp8,fp8,0,89.65785725911458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,float16,0,36.520790100097656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,fp8,0,36.73770650227865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,float16,0,34.9137929280599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,64,64,0,1,fp8,fp8,0,47.238484700520836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,float16,0,70.6501973470052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,fp8,0,70.81523132324219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,4,64,0,1,fp8,fp8,0,91.55037434895833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,fp8,0,34.87726847330729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,8,64,0,1,fp8,fp8,0,92.17620849609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,float16,0,34.75746154785156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,1,64,0,1,fp8,fp8,0,43.95332336425781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,fp8,0,34.88785044352213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,float16,0,33.731241861979164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,2,64,0,1,fp8,fp8,0,44.28953552246094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,fp8,0,34.848767598470054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,float16,0,18.94041570027669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,float16,0,35.05834706624349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,fp8,0,17.93501917521159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,4,64,0,1,fp8,fp8,0,44.15095520019531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,fp8,0,34.23914591471354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,float16,0,18.05619176228841
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,64,64,0,1,fp8,fp8,0,23.566678365071613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,8,64,0,1,fp8,fp8,0,44.50286865234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,fp8,0,17.883477528889973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,float16,0,17.50766881306966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,1,64,0,1,fp8,fp8,0,22.37303415934245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,fp8,0,17.311743418375652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,2,64,0,1,fp8,fp8,0,22.029141743977863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,float16,0,17.62389373779297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,fp8,0,17.149951934814453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,4,64,0,1,fp8,fp8,0,22.155776977539062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,float16,0,9.652736028035482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,float16,0,17.388373057047527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,fp8,0,9.193642934163412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,fp8,0,17.11667251586914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,float16,0,9.24893887837728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,64,64,0,1,fp8,fp8,0,11.98199462890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,8,64,0,1,fp8,fp8,0,22.63910420735677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,fp8,0,9.037994384765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,float16,0,9.635157267252604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,1,64,0,1,fp8,fp8,0,11.234986623128256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,fp8,0,9.387178421020508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,2,64,0,1,fp8,fp8,0,11.232426961263021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,float16,0,9.164629618326822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,fp8,0,9.243477503458658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,4,64,0,1,fp8,fp8,0,11.496106465657553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,float16,0,9.27726936340332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,fp8,0,9.097557067871094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,8,64,0,1,fp8,fp8,0,11.265535990397135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,float16,0,49.470977783203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,fp8,0,48.446634928385414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,float16,0,49.8696543375651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,fp8,0,50.09851582845052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,1,64,0,1,fp8,fp8,0,62.23616027832031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,float16,0,48.849405924479164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,fp8,0,48.20445760091146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,2,64,0,1,fp8,fp8,0,62.53192647298177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,float16,0,25.894058227539062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,fp8,0,26.21576436360677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,float16,0,24.72345479329427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,float16,0,48.0875498453776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,64,64,0,1,fp8,fp8,0,33.352193196614586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,fp8,0,49.45049540201823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,4,64,0,1,fp8,fp8,0,62.48943074544271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,8,64,0,1,fp8,fp8,0,64.21469624837239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,fp8,0,24.45294952392578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,float16,0,24.52991994222005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,1,64,0,1,fp8,fp8,0,30.950912475585938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,fp8,0,24.2162348429362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,float16,0,24.77038828531901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,2,64,0,1,fp8,fp8,0,30.970367431640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,fp8,0,24.51234181722005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,float16,0,13.261482238769531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,float16,0,24.198827107747395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,4,64,0,1,fp8,fp8,0,31.493631998697918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,fp8,0,13.462357838948568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,fp8,0,24.165374755859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,float16,0,12.606805165608725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,64,64,0,1,fp8,fp8,0,16.66167449951172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,8,64,0,1,fp8,fp8,0,31.32501220703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,fp8,0,12.866218566894531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,float16,0,12.766549428304037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,1,64,0,1,fp8,fp8,0,15.560874938964844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,fp8,0,12.992170969645182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,float16,0,12.505940755208334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,2,64,0,1,fp8,fp8,0,15.569408416748047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,fp8,0,12.428970336914062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,4,64,0,1,fp8,fp8,0,15.641600290934244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,float16,0,6.941866556803386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,float16,0,12.426923116048178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,fp8,0,12.842496236165365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,fp8,0,6.418432235717773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,float16,0,5.901653289794922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,64,64,0,1,fp8,fp8,0,8.399701436360678
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,8,64,0,1,fp8,fp8,0,15.781888326009115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,fp8,0,6.557696024576823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,float16,0,6.422698974609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,fp8,0,5.8883412679036455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,1,64,0,1,fp8,fp8,0,8.04147211710612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,float16,0,6.232746760050456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,fp8,0,6.162261327107747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,2,64,0,1,fp8,fp8,0,7.917738596598308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,4,64,0,1,fp8,fp8,0,8.00921630859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,float16,0,6.436010360717773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,fp8,0,6.410581588745117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,8,64,0,1,fp8,fp8,0,8.006997426350912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,fp8,0,67.27372741699219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,float16,0,69.2650655110677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,float16,0,68.3487548828125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,fp8,0,66.72127787272136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,1,64,0,1,fp8,fp8,0,82.52262369791667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,float16,0,66.61359151204427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,fp8,0,69.04610188802083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,2,64,0,1,fp8,fp8,0,84.85854085286458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,fp8,0,34.37841033935547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,float16,0,34.99895477294922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,float16,0,32.811177571614586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,64,64,0,1,fp8,fp8,0,45.05292765299479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,float16,0,68.62865193684895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,fp8,0,66.99400329589844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,4,64,0,1,fp8,fp8,0,85.08330790201823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,8,64,0,1,fp8,fp8,0,85.988525390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,fp8,0,32.29678853352865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,float16,0,32.615081787109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,1,64,0,1,fp8,fp8,0,40.04198455810547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,fp8,0,33.18988800048828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,float16,0,31.851178487141926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,2,64,0,1,fp8,fp8,0,40.68880971272787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,fp8,0,31.863807678222656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,float16,0,31.60644276936849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,float16,0,18.13538106282552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,4,64,0,1,fp8,fp8,0,40.65245819091797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,fp8,0,17.227434794108074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,fp8,0,32.29969024658203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,8,64,0,1,fp8,fp8,0,40.947713216145836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,64,64,0,1,fp8,fp8,0,22.1842778523763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,float16,0,16.435712178548176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,fp8,0,16.559956868489582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,float16,0,16.121344248453777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,fp8,0,16.306176503499348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,1,64,0,1,fp8,fp8,0,20.30950419108073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,2,64,0,1,fp8,fp8,0,20.17740758260091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,float16,0,16.34286880493164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,fp8,0,16.487253824869793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,4,64,0,1,fp8,fp8,0,20.14190928141276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,float16,0,8.9978879292806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,float16,0,15.951189676920572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,fp8,0,16.03106180826823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,fp8,0,9.229653040568033
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,float16,0,8.50705083211263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,8,64,0,1,fp8,fp8,0,20.24823506673177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,64,64,0,1,fp8,fp8,0,11.087360382080078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,fp8,0,8.370517094930014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,float16,0,8.448341369628906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,1,64,0,1,fp8,fp8,0,10.288469314575195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,fp8,0,7.803733189900716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,2,64,0,1,fp8,fp8,0,10.197504043579102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,float16,0,8.633344014485678
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,fp8,0,7.960063934326172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,float16,0,8.193706512451172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,4,64,0,1,fp8,fp8,0,10.246655782063803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,float16,0,4.030975977579753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,fp8,0,8.387242635091146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,fp8,0,4.286975860595703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,8,64,0,1,fp8,fp8,0,10.477055867513021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,float16,0,3.868330637613932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,64,64,0,1,fp8,fp8,0,5.595647811889648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,fp8,0,3.4983253479003906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,1,64,0,1,fp8,fp8,0,5.163519859313965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,float16,0,3.8562132517496743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,fp8,0,3.452757199605306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,float16,0,3.9480320612589517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,2,64,0,1,fp8,fp8,0,5.133994738260905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,fp8,0,3.8594560623168945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,float16,0,3.5130027135213218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,4,64,0,1,fp8,fp8,0,5.19594669342041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,fp8,0,3.9115091959635415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,8,64,0,1,fp8,fp8,0,5.193045298258464
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,float16,0,37.89158376057943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,fp8,0,38.6513926188151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,fp8,0,38.16345723470052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,float16,0,38.489428202311196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,float16,0,37.93032582600912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,1,64,0,1,fp8,fp8,0,46.475433349609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,2,64,0,1,fp8,fp8,0,47.46973673502604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,fp8,0,38.1668701171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,float16,0,21.02476755777995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,fp8,0,20.627967834472656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,float16,0,19.02779769897461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,64,64,0,1,fp8,fp8,0,26.236073811848957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,fp8,0,38.18734995524088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,float16,0,38.74457550048828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,4,64,0,1,fp8,fp8,0,47.40556844075521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,8,64,0,1,fp8,fp8,0,47.99026997884115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,fp8,0,19.003050486246746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,1,64,0,1,fp8,fp8,0,23.096661885579426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,float16,0,18.459306081136067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,fp8,0,19.42613347371419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,float16,0,19.13326899210612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,2,64,0,1,fp8,fp8,0,23.180458068847656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,fp8,0,18.94638951619466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,4,64,0,1,fp8,fp8,0,23.16595204671224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,float16,0,19.06329600016276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,float16,0,10.519893646240234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,fp8,0,18.896724700927734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,fp8,0,10.847573598225912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,float16,0,9.999359766642252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,8,64,0,1,fp8,fp8,0,23.54363759358724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,64,64,0,1,fp8,fp8,0,12.930048624674479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,fp8,0,9.72219721476237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,1,64,0,1,fp8,fp8,0,11.62990951538086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,float16,0,9.81282107035319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,fp8,0,9.641984303792318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,float16,0,9.585152308146158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,2,64,0,1,fp8,fp8,0,11.72701899210612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,fp8,0,9.690794626871744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,4,64,0,1,fp8,fp8,0,11.594581604003906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,float16,0,9.879210789998373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,float16,0,5.282986640930176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,fp8,0,9.918293635050455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,fp8,0,5.016234715779622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,float16,0,4.558677355448405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,8,64,0,1,fp8,fp8,0,11.88113021850586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,64,64,0,1,fp8,fp8,0,6.572544097900391
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,fp8,0,4.663125356038411
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,1,64,0,1,fp8,fp8,0,5.7113602956136065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,fp8,0,4.038485209147136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,float16,0,4.700160026550293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,fp8,0,4.144469261169434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,float16,0,4.5856428146362305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,2,64,0,1,fp8,fp8,0,5.811712265014648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,4,64,0,1,fp8,fp8,0,5.937664031982422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,float16,0,4.6469119389851885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,fp8,0,4.609194755554199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,float16,0,2.478762626647949
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,8,64,0,1,fp8,fp8,0,5.9403947194417315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,fp8,0,2.474496046702067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,float16,0,2.0968106587727866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,64,64,0,1,fp8,fp8,0,3.1487998962402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,fp8,0,2.090837319691976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,1,64,0,1,fp8,fp8,0,2.8654934565226235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,float16,0,2.0215466817220054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,fp8,0,2.07206392288208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,2,64,0,1,fp8,fp8,0,2.9122559229532876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,float16,0,2.0753067334493003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,fp8,0,2.086911996205648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,4,64,0,1,fp8,fp8,0,2.8349440892537436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,float16,0,2.077354590098063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,fp8,0,2.0887893040974936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,8,64,0,1,fp8,fp8,0,2.8745387395222983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,float16,0,37.09388732910156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,fp8,0,36.699989318847656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,float16,0,37.47549947102865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,fp8,0,37.46764882405599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,float16,0,37.76870473225912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,1,64,0,1,fp8,fp8,0,44.34892781575521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,2,64,0,1,fp8,fp8,0,46.7606201171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,fp8,0,38.364672342936196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,float16,0,21.600425720214844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,float16,0,18.451285044352215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,fp8,0,21.065728505452473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,64,64,0,1,fp8,fp8,0,26.09100850423177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,float16,0,38.49267323811849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,fp8,0,37.52994028727213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,4,64,0,1,fp8,fp8,0,46.756693522135414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,8,64,0,1,fp8,fp8,0,47.869954427083336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,fp8,0,18.07086944580078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,float16,0,18.79551951090495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,1,64,0,1,fp8,fp8,0,21.365931193033855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,fp8,0,18.861056009928387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,float16,0,17.931434631347656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,2,64,0,1,fp8,fp8,0,21.544278462727863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,fp8,0,18.364757537841797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,4,64,0,1,fp8,fp8,0,21.833045959472656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,float16,0,18.041173299153645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,float16,0,10.591573079427084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,fp8,0,18.48797861735026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,fp8,0,10.049023946126303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,8,64,0,1,fp8,fp8,0,22.054229736328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,float16,0,9.369600296020508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,64,64,0,1,fp8,fp8,0,12.617557525634766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,fp8,0,9.217023849487305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,1,64,0,1,fp8,fp8,0,10.699092864990234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,float16,0,9.486165364583334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,fp8,0,9.423360188802084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,2,64,0,1,fp8,fp8,0,10.754730224609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,fp8,0,9.451349258422852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,float16,0,9.221973419189453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,4,64,0,1,fp8,fp8,0,10.796202341715494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,float16,0,9.032533645629883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,float16,0,5.205845197041829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,fp8,0,8.842410405476889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,float16,0,4.190378824869792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,fp8,0,5.004117329915364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,8,64,0,1,fp8,fp8,0,11.073365529378256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,64,64,0,1,fp8,fp8,0,6.264149347941081
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,fp8,0,3.8481918970743814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,1,64,0,1,fp8,fp8,0,5.330944061279297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,float16,0,4.221269289652507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,fp8,0,4.269738515218099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,float16,0,4.249087969462077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,fp8,0,4.176213264465332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,2,64,0,1,fp8,fp8,0,5.326506614685059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,4,64,0,1,fp8,fp8,0,5.399381637573242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,float16,0,4.319061279296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,fp8,0,4.313088099161784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,float16,0,2.5118719736735025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,8,64,0,1,fp8,fp8,0,5.484885533650716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,fp8,0,2.4260266621907554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,float16,0,1.841493288675944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,fp8,0,1.8950826327006023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,1,64,0,1,fp8,fp8,0,2.6033493677775064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,64,64,0,1,fp8,fp8,0,3.0830933252970376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,float16,0,1.8327892621358235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,fp8,0,1.8568533261617024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,2,64,0,1,fp8,fp8,0,2.634069283803304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,float16,0,1.847808043162028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,fp8,0,1.826133410135905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,float16,0,1.8831359545389812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,4,64,0,1,fp8,fp8,0,2.6291200319925943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,fp8,0,1.9000320434570312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,8,64,0,1,fp8,fp8,0,2.6499412854512534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,float16,0,1.2175359725952148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,fp8,0,1.1876693566640217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,64,64,0,1,fp8,fp8,0,1.5402666727701824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,fp8,0,0.99618132909139
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,float16,0,0.9939626852671305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,float16,0,0.9931093056996664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,1,64,0,1,fp8,fp8,0,1.3581652641296387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,fp8,0,0.9886720180511475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,2,64,0,1,fp8,fp8,0,1.3429759343465169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,float16,0,0.9919146696726481
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,fp8,0,0.9951573212941488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,4,64,0,1,fp8,fp8,0,1.344853401184082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,float16,0,0.9738240242004395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,fp8,0,0.9903786977132162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,8,64,0,1,fp8,fp8,0,1.3634559313456218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,fp8,0,21.768704732259113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,float16,0,21.912574768066406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,float16,0,21.89124298095703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,fp8,0,22.01104990641276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,float16,0,22.087679545084637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,fp8,0,22.084096272786457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,1,64,0,1,fp8,fp8,0,25.48479970296224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,2,64,0,1,fp8,fp8,0,25.878697713216145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,float16,0,10.788351694742838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,float16,0,13.552298227945963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,fp8,0,13.139456431070963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,64,64,0,1,fp8,fp8,0,15.485951741536459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,float16,0,20.524885813395183
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,4,64,0,1,fp8,fp8,0,26.377215067545574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,fp8,0,21.894826253255207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,8,64,0,1,fp8,fp8,0,27.149312337239582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,fp8,0,10.840405782063803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,float16,0,11.04571787516276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,1,64,0,1,fp8,fp8,0,12.624384562174479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,fp8,0,11.104597727457682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,2,64,0,1,fp8,fp8,0,12.647764841715494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,float16,0,10.80285898844401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,fp8,0,10.751829783121744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,4,64,0,1,fp8,fp8,0,12.786688486735025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,float16,0,6.742869059244792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,float16,0,11.099989573160807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,fp8,0,11.123199462890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,float16,0,5.190314610799153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,fp8,0,6.349994659423828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,64,64,0,1,fp8,fp8,0,7.622655868530273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,8,64,0,1,fp8,fp8,0,13.10549290974935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,fp8,0,5.1763200759887695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,float16,0,4.744533220926921
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,1,64,0,1,fp8,fp8,0,6.291626612345378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,fp8,0,5.414229075113933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,float16,0,5.190314610799153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,2,64,0,1,fp8,fp8,0,6.29145622253418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,fp8,0,5.33077335357666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,4,64,0,1,fp8,fp8,0,6.198272069295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,float16,0,5.318997383117676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,float16,0,3.2612692515055337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,float16,0,2.3029759724934897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,fp8,0,3.114154815673828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,fp8,0,5.316949208577474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,64,64,0,1,fp8,fp8,0,3.710975964864095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,8,64,0,1,fp8,fp8,0,6.466901143391927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,fp8,0,2.3437652587890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,1,64,0,1,fp8,fp8,0,3.0011733373006186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,fp8,0,2.3040000597635903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,float16,0,2.3584426244099936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,float16,0,2.3297707239786782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,fp8,0,2.326357364654541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,2,64,0,1,fp8,fp8,0,3.0206292470296225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,4,64,0,1,fp8,fp8,0,3.056981404622396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,float16,0,2.419882615407308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,fp8,0,2.3534933725992837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,float16,0,1.5779840151468914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,fp8,0,1.4825812975565593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,8,64,0,1,fp8,fp8,0,3.110741297403971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,float16,0,1.0801493326822917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,64,64,0,1,fp8,fp8,0,1.8583893775939941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,fp8,0,1.1129173437754314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,1,64,0,1,fp8,fp8,0,1.5189332962036133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,float16,0,1.0792960325876872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,fp8,0,1.1047253608703613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,float16,0,1.0791253248850505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,2,64,0,1,fp8,fp8,0,1.4972586631774902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,fp8,0,1.0746880372365315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,float16,0,1.1144533157348633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,4,64,0,1,fp8,fp8,0,1.5109119415283203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,fp8,0,1.1083093484242756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,8,64,0,1,fp8,fp8,0,1.5467519760131836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,float16,0,0.6720853646596273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,fp8,0,0.6181546847025553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,float16,0,0.6068906784057617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,64,64,0,1,fp8,fp8,0,0.9460053443908691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,fp8,0,0.6048426628112793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,float16,0,0.6048426628112793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,1,64,0,1,fp8,fp8,0,0.8101546764373779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,fp8,0,0.6005760033925375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,2,64,0,1,fp8,fp8,0,0.8144213358561198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,float16,0,0.602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,fp8,0,0.6137173175811768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,float16,0,0.6005760033925375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,4,64,0,1,fp8,fp8,0,0.8058880170186361
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,fp8,0,0.5990399916966757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,8,64,0,1,fp8,fp8,0,0.8099839687347412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,float16,0,21.99603271484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,fp8,0,22.368939717610676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,float16,0,22.374399820963543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,1,64,0,1,fp8,fp8,0,25.235796610514324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,fp8,0,22.640981038411457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,fp8,0,22.02367909749349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,float16,0,23.25640614827474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,2,64,0,1,fp8,fp8,0,26.874366760253906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,float16,0,10.656597137451172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,fp8,0,14.086655934651693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,float16,0,15.089834849039713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,64,64,0,1,fp8,fp8,0,16.629418690999348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,4,64,0,1,fp8,fp8,0,27.357012430826824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,float16,0,22.835540771484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,fp8,0,22.791168212890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,8,64,0,1,fp8,fp8,0,27.968340555826824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,fp8,0,10.636287689208984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,float16,0,10.970624287923178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,1,64,0,1,fp8,fp8,0,12.074666341145834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,fp8,0,11.256832122802734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,2,64,0,1,fp8,fp8,0,12.317695617675781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,float16,0,10.768213907877604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,fp8,0,10.674176534016928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,4,64,0,1,fp8,fp8,0,12.40661366780599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,float16,0,10.833919525146484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,float16,0,7.396010716756185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,fp8,0,6.908416112263997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,fp8,0,10.90713628133138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,float16,0,5.040298779805501
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,64,64,0,1,fp8,fp8,0,7.865002950032552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,8,64,0,1,fp8,fp8,0,12.687530517578125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,fp8,0,5.137920061747233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,1,64,0,1,fp8,fp8,0,5.926570892333984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,float16,0,5.365930557250977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,fp8,0,5.087573369344075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,2,64,0,1,fp8,fp8,0,6.066517512003581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,float16,0,5.085866610209147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,fp8,0,5.150378545125325
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,4,64,0,1,fp8,fp8,0,6.074709574381511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,float16,0,5.222912152608235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,float16,0,3.522730509440104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,fp8,0,4.8706560134887695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,fp8,0,3.421525319417318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,float16,0,2.3456427256266275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,8,64,0,1,fp8,fp8,0,6.305791854858398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,64,64,0,1,fp8,fp8,0,3.8355627059936523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,fp8,0,2.2949546178181968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,1,64,0,1,fp8,fp8,0,2.850816090901693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,float16,0,2.3159467379252114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,float16,0,2.3985493977864585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,fp8,0,2.2923946380615234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,2,64,0,1,fp8,fp8,0,2.90662415822347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,fp8,0,2.3867732683817544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,float16,0,2.4268800417582193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,4,64,0,1,fp8,fp8,0,2.9364906946818032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,fp8,0,2.4104960759480796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,8,64,0,1,fp8,fp8,0,2.970794677734375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,float16,0,1.7112746238708496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,float16,0,1.0806612968444824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,fp8,0,1.6008532842000325
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,64,64,0,1,fp8,fp8,0,1.9128319422403972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,fp8,0,1.0516479810078938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,1,64,0,1,fp8,fp8,0,1.4327467282613118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,float16,0,1.06496000289917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,fp8,0,1.0618879795074463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,2,64,0,1,fp8,fp8,0,1.4375252723693848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,fp8,0,1.0670080184936523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,float16,0,1.1018239657084148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,4,64,0,1,fp8,fp8,0,1.4559574127197266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,float16,0,1.1122346719106038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,fp8,0,1.0949973265329997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,8,64,0,1,fp8,fp8,0,1.4873600006103516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,float16,0,0.7862613201141357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,fp8,0,0.7408640384674072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,fp8,0,0.5505706469217936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,64,64,0,1,fp8,fp8,0,0.9640959898630778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,1,64,0,1,fp8,fp8,0,0.7232853571573893
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,float16,0,0.5280426740646362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,float16,0,0.5463039875030518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,fp8,0,0.5309439897537231
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,2,64,0,1,fp8,fp8,0,0.725162665049235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,float16,0,0.5393066803614298
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,fp8,0,0.5440853436787924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,4,64,0,1,fp8,fp8,0,0.7193600336710612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,float16,0,0.5253119866053263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,fp8,0,0.5213866631189982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,8,64,0,1,fp8,fp8,0,0.7403519948323568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,float16,0,0.31112533807754517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,fp8,0,0.3078826665878296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,64,64,0,1,fp8,fp8,0,0.4536319971084595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,float16,0,0.3107840021451314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,fp8,0,0.3176106611887614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,1,64,0,1,fp8,fp8,0,0.4102826515833537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,float16,0,0.32102400064468384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,fp8,0,0.3165866732597351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,2,64,0,1,fp8,fp8,0,0.4097706476847331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,float16,0,0.3189760049184163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,fp8,0,0.320853332678477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,float16,0,0.30617600679397583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,fp8,0,0.31624533732732135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,4,64,0,1,fp8,fp8,0,0.41147732734680176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,8,64,0,1,fp8,fp8,0,0.4121599992116292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,float16,0,13.492907206217447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,fp8,0,13.198506673177084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,1,64,0,1,fp8,fp8,0,14.842538197835287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,float16,0,13.701119740804037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,fp8,0,13.67313003540039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,fp8,0,13.243391672770182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,float16,0,13.57107162475586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,2,64,0,1,fp8,fp8,0,15.378944396972656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,float16,0,13.699925740559896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,4,64,0,1,fp8,fp8,0,15.956309000651041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,float16,0,6.335488001505534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,float16,0,9.725610733032227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,fp8,0,9.221290588378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,fp8,0,13.804543813069662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,64,64,0,1,fp8,fp8,0,10.197162628173828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,8,64,0,1,fp8,fp8,0,16.363178253173828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,fp8,0,6.301013310750325
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,1,64,0,1,fp8,fp8,0,7.318527857462565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,float16,0,6.291968027750651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,fp8,0,6.015317281087239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,float16,0,6.303232192993164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,2,64,0,1,fp8,fp8,0,7.336789449055989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,fp8,0,6.28326416015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,4,64,0,1,fp8,fp8,0,7.475882848103841
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,float16,0,6.507520039876302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,fp8,0,6.52236811319987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,float16,0,4.698453267415364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,float16,0,3.024042765299479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,8,64,0,1,fp8,fp8,0,7.676245371500651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,fp8,0,4.492287953694661
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,64,64,0,1,fp8,fp8,0,4.944213231404622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,fp8,0,3.008853276570638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,1,64,0,1,fp8,fp8,0,3.473408063252767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,float16,0,3.0818986892700195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,fp8,0,2.9938348134358725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,fp8,0,2.946901321411133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,float16,0,3.021653175354004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,2,64,0,1,fp8,fp8,0,3.561642646789551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,4,64,0,1,fp8,fp8,0,3.596799850463867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,float16,0,3.195904095967611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,fp8,0,3.134976069132487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,8,64,0,1,fp8,fp8,0,3.719168027242025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,float16,0,2.2958079973856607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,float16,0,1.4149972597757976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,fp8,0,2.1601279576619468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,64,64,0,1,fp8,fp8,0,2.435925324757894
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,fp8,0,1.3716479937235515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,1,64,0,1,fp8,fp8,0,1.7184425989786785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,float16,0,1.3967359860738118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,fp8,0,1.4110719362894695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,2,64,0,1,fp8,fp8,0,1.7269760767618816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,float16,0,1.4518613815307617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,fp8,0,1.4064639409383137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,4,64,0,1,fp8,fp8,0,1.749674638112386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,float16,0,1.4824105898539226
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,fp8,0,1.493674596150716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,8,64,0,1,fp8,fp8,0,1.804800033569336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,float16,0,1.0876586437225342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,fp8,0,1.0067626635233562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,64,64,0,1,fp8,fp8,0,1.228117307027181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,float16,0,0.6202026605606079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,fp8,0,0.6249813238779703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,1,64,0,1,fp8,fp8,0,0.8521386782328287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,float16,0,0.6386346817016602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,fp8,0,0.6283946832021078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,2,64,0,1,fp8,fp8,0,0.8673280080159506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,float16,0,0.6220800081888834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,fp8,0,0.6444373528162638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,4,64,0,1,fp8,fp8,0,0.8623786767323812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,float16,0,0.6427306731541952
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,fp8,0,0.6297599871953329
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,8,64,0,1,fp8,fp8,0,0.905898650487264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,float16,0,0.442197322845459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,fp8,0,0.3804159959157308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,64,64,0,1,fp8,fp8,0,0.6149119933446249
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,float16,0,0.3336533308029175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,fp8,0,0.3213653365770976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,1,64,0,1,fp8,fp8,0,0.44441600640614826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,float16,0,0.32204800844192505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,fp8,0,0.32716800769170123
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,2,64,0,1,fp8,fp8,0,0.4471466541290283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,float16,0,0.32716800769170123
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,fp8,0,0.3232426643371582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,4,64,0,1,fp8,fp8,0,0.44868266582489014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,float16,0,0.3252906600634257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,fp8,0,0.3264853358268738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,8,64,0,1,fp8,fp8,0,0.4457813501358032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,float16,0,0.19643733898798624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,fp8,0,0.19114667177200317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,64,64,0,1,fp8,fp8,0,0.2691413362820943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,float16,0,0.1986560026804606
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,fp8,0,0.1976319948832194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,1,64,0,1,fp8,fp8,0,0.2583893338839213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,float16,0,0.19933867454528809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,fp8,0,0.20241065820058188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,2,64,0,1,fp8,fp8,0,0.2568533420562744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,float16,0,0.20104533433914185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,fp8,0,0.20087466637293497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,4,64,0,1,fp8,fp8,0,0.2611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,float16,0,0.20036266247431436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,fp8,0,0.1930239995320638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,8,64,0,1,fp8,fp8,0,0.26026666164398193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,float16,0,13.679786682128906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,fp8,0,13.57977549235026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,1,64,0,1,fp8,fp8,0,15.094954172770182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,float16,0,14.50103505452474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,fp8,0,14.14894994099935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,float16,0,13.975723266601562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,2,64,0,1,fp8,fp8,0,15.818410237630209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,fp8,0,14.06003189086914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,float16,0,14.425429026285807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,4,64,0,1,fp8,fp8,0,15.889748891194662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,float16,0,6.514346440633138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,float16,0,11.44098154703776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,fp8,0,14.119424184163412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,fp8,0,10.822315216064453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,64,64,0,1,fp8,fp8,0,11.48586654663086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,8,64,0,1,fp8,fp8,0,16.309589385986328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,fp8,0,6.666069030761719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,1,64,0,1,fp8,fp8,0,7.32689094543457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,float16,0,6.606335957845052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,fp8,0,6.561279932657878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,float16,0,6.688255945841472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,2,64,0,1,fp8,fp8,0,7.458133061726888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,fp8,0,6.766933441162109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,4,64,0,1,fp8,fp8,0,7.744170506795247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,float16,0,7.02293332417806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,fp8,0,6.816256205240886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,8,64,0,1,fp8,fp8,0,7.865856170654297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,float16,0,5.600426355997722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,float16,0,3.214336077372233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,fp8,0,5.2930558522542315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,64,64,0,1,fp8,fp8,0,5.436586380004883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,fp8,0,3.204949378967285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,1,64,0,1,fp8,fp8,0,3.534847895304362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,float16,0,3.1916373570760093
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,fp8,0,3.1865173975626626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,2,64,0,1,fp8,fp8,0,3.571711858113607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,float16,0,3.2907946904500327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,fp8,0,3.2040961583455405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,4,64,0,1,fp8,fp8,0,3.6394666035970054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,float16,0,3.4315945307413735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,fp8,0,3.3727146784464517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,8,64,0,1,fp8,fp8,0,3.739136060078939
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,float16,0,1.4892373085021973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,fp8,0,2.582869370778402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,float16,0,2.730496088663737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,fp8,0,1.4976000785827637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,64,64,0,1,fp8,fp8,0,2.6975574493408203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,1,64,0,1,fp8,fp8,0,1.732437292734782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,float16,0,1.531050682067871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,fp8,0,1.4965759913126628
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,float16,0,1.547946612040202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,2,64,0,1,fp8,fp8,0,1.7368747393290203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,4,64,0,1,fp8,fp8,0,1.772714614868164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,fp8,0,1.5312213897705078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,float16,0,1.6566613515218098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,fp8,0,1.6230400403340657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,8,64,0,1,fp8,fp8,0,1.842517375946045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,float16,0,0.656554659207662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,fp8,0,1.234773317972819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,float16,0,1.3122560183207195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,fp8,0,0.6526293357213339
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,64,64,0,1,fp8,fp8,0,1.3257386684417725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,1,64,0,1,fp8,fp8,0,0.8645973205566406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,float16,0,0.6780587037404379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,fp8,0,0.673962672551473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,2,64,0,1,fp8,fp8,0,0.8654507001241049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,float16,0,0.6773760318756104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,fp8,0,0.6720853646596273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,4,64,0,1,fp8,fp8,0,0.8886613051096598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,float16,0,0.7234559853871664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,fp8,0,0.7226026852925619
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,8,64,0,1,fp8,fp8,0,0.9318400224049886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,float16,0,0.5806080102920532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,fp8,0,0.5307733217875162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,64,64,0,1,fp8,fp8,0,0.6751573085784912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,float16,0,0.31061333417892456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,fp8,0,0.3170986572901408
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,1,64,0,1,fp8,fp8,0,0.420693318049113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,float16,0,0.3295573393503825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,fp8,0,0.32307199637095135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,2,64,0,1,fp8,fp8,0,0.4237653414408366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,float16,0,0.3160746693611145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,fp8,0,0.30958932638168335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,float16,0,0.31010133028030396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,4,64,0,1,fp8,fp8,0,0.4198400179545085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,fp8,0,0.30856533845265705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,float16,0,0.18039466937383017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,8,64,0,1,fp8,fp8,0,0.42444801330566406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,fp8,0,0.1730560064315796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,float16,0,0.16247466206550598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,64,64,0,1,fp8,fp8,0,0.2908160090446472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,fp8,0,0.16025599837303162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,fp8,0,0.1629866659641266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,1,64,0,1,fp8,fp8,0,0.2228906750679016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,float16,0,0.1621333360671997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,2,64,0,1,fp8,fp8,0,0.2228906750679016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,fp8,0,0.16179200013478598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,float16,0,0.1764693260192871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,4,64,0,1,fp8,fp8,0,0.2182826598485311
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,float16,0,0.16315733393033346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,fp8,0,0.16145066420237222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,float16,0,0.09830400347709656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,fp8,0,0.09915733337402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,8,64,0,1,fp8,fp8,0,0.22357332706451416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,64,64,0,1,fp8,fp8,0,0.12748799721399942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,fp8,0,0.09796266754468282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,1,64,0,1,fp8,fp8,0,0.12731732924779257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,float16,0,0.09727999567985535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,fp8,0,0.09642666578292847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,2,64,0,1,fp8,fp8,0,0.12714667121569315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,float16,0,0.09727999567985535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,fp8,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,4,64,0,1,fp8,fp8,0,0.12578133742014566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,float16,0,0.09557333588600159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,fp8,0,0.09540266791979472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,8,64,0,1,fp8,fp8,0,0.12714667121569315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,1,64,0,1,float16,float16,0,10.125482559204102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,1,64,0,1,float16,fp8,0,10.279253641764322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,1,64,0,1,fp8,fp8,0,10.302293141682943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,2,64,0,1,float16,float16,0,10.77572250366211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,2,64,0,1,float16,fp8,0,10.910719553629557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,2,64,0,1,fp8,fp8,0,10.91379165649414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,4,64,0,1,float16,fp8,0,10.950655619303385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,4,64,0,1,float16,float16,0,10.8221435546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,4,64,0,1,fp8,fp8,0,11.082239786783854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,8,64,0,1,float16,float16,0,11.255636850992838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,8,64,0,1,float16,fp8,0,11.00168482462565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,1,64,0,1,float16,float16,0,4.723370552062988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,64,64,0,1,float16,fp8,0,9.803775787353516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,64,64,0,1,float16,float16,0,10.426538467407227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,8,64,0,1,fp8,fp8,0,11.47869873046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,64,64,0,1,fp8,fp8,0,9.373525619506836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,1,64,0,1,float16,fp8,0,4.737194697062175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,1,64,0,1,fp8,fp8,0,4.928682645161946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,2,64,0,1,float16,float16,0,4.884138743082683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,2,64,0,1,float16,fp8,0,4.847615877787272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,2,64,0,1,fp8,fp8,0,5.051904042561849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,4,64,0,1,float16,float16,0,5.043541272481282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,4,64,0,1,float16,fp8,0,4.930047988891602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,4,64,0,1,fp8,fp8,0,5.274453481038411
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,8,64,0,1,float16,float16,0,5.316266695658366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,8,64,0,1,float16,fp8,0,5.261311848958333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,8,64,0,1,fp8,fp8,0,5.450069427490234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,64,64,0,1,float16,float16,0,5.058901468912761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,1,64,0,1,float16,float16,0,2.28983465830485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,1,64,0,1,float16,fp8,0,2.3164587020874023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,64,64,0,1,fp8,fp8,0,4.369749387105306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,64,64,0,1,float16,fp8,0,4.791808128356934
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,1,64,0,1,fp8,fp8,0,2.3727787335713706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,2,64,0,1,float16,float16,0,2.39411195119222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,2,64,0,1,float16,fp8,0,2.3570772806803384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,2,64,0,1,fp8,fp8,0,2.434901396433512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,4,64,0,1,float16,float16,0,2.3891626993815103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,4,64,0,1,float16,fp8,0,2.399402618408203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,4,64,0,1,fp8,fp8,0,2.5270613034566245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,8,64,0,1,float16,float16,0,2.6642773946126304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,8,64,0,1,float16,fp8,0,2.551978588104248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,64,64,0,1,float16,float16,0,2.4738133748372397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,8,64,0,1,fp8,fp8,0,2.5983999570210776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,64,64,0,1,float16,fp8,0,2.332159996032715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,1,64,0,1,float16,float16,0,1.0820266405741374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,64,64,0,1,fp8,fp8,0,2.1273600260416665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,1,64,0,1,float16,fp8,0,1.0786133607228596
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,1,64,0,1,fp8,fp8,0,1.1608746846516926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,2,64,0,1,float16,float16,0,1.0926079750061035
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,2,64,0,1,float16,fp8,0,1.084928035736084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,2,64,0,1,fp8,fp8,0,1.193130652109782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,4,64,0,1,float16,float16,0,1.140053351720174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,4,64,0,1,float16,fp8,0,1.137664000193278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,4,64,0,1,fp8,fp8,0,1.214634656906128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,8,64,0,1,float16,float16,0,1.2526933352152507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,8,64,0,1,float16,fp8,0,1.2284586429595947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,8,64,0,1,fp8,fp8,0,1.2750506401062012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,64,64,0,1,float16,float16,0,1.1695786317189534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,64,64,0,1,float16,fp8,0,1.0856106281280518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,64,64,0,1,fp8,fp8,0,1.04584534962972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,1,64,0,1,float16,float16,0,0.45841066042582196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,1,64,0,1,float16,fp8,0,0.4485119978586833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,1,64,0,1,fp8,fp8,0,0.5806080102920532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,2,64,0,1,float16,float16,0,0.4642133315404256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,2,64,0,1,float16,fp8,0,0.4643839995066325
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,2,64,0,1,fp8,fp8,0,0.5884586572647095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,4,64,0,1,float16,float16,0,0.48878931999206543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,4,64,0,1,float16,fp8,0,0.4739413261413574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,8,64,0,1,float16,float16,0,0.532480001449585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,4,64,0,1,fp8,fp8,0,0.6143999894460043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,8,64,0,1,float16,fp8,0,0.5196799834569296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,8,64,0,1,fp8,fp8,0,0.6454613208770752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,64,64,0,1,float16,float16,0,0.48418132464090985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,64,64,0,1,float16,fp8,0,0.4375893274943034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,64,64,0,1,fp8,fp8,0,0.532480001449585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,1,64,0,1,float16,float16,0,0.21145600080490112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,1,64,0,1,float16,fp8,0,0.211626668771108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,1,64,0,1,fp8,fp8,0,0.26521599292755127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,2,64,0,1,float16,fp8,0,0.21333332856496176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,2,64,0,1,float16,float16,0,0.212991992632548
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,2,64,0,1,fp8,fp8,0,0.2648746569951375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,4,64,0,1,float16,float16,0,0.20667733748753866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,4,64,0,1,float16,fp8,0,0.2152106761932373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,4,64,0,1,fp8,fp8,0,0.26948267221450806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,8,64,0,1,float16,float16,0,0.20770132541656494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,8,64,0,1,float16,fp8,0,0.21196800470352173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,8,64,0,1,fp8,fp8,0,0.27153066794077557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,64,64,0,1,float16,float16,0,0.12185600399971008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,64,64,0,1,float16,fp8,0,0.11127466956774394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,1,64,0,1,float16,float16,0,0.1109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,64,64,0,1,fp8,fp8,0,0.2167466680208842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,1,64,0,1,fp8,fp8,0,0.14131200313568115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,1,64,0,1,float16,fp8,0,0.11212799946467082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,2,64,0,1,float16,float16,0,0.11229866743087769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,2,64,0,1,float16,fp8,0,0.1114453375339508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,2,64,0,1,fp8,fp8,0,0.1430186629295349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,4,64,0,1,float16,float16,0,0.11178666353225708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,4,64,0,1,fp8,fp8,0,0.14404267072677612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,4,64,0,1,float16,fp8,0,0.11110400160153706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,8,64,0,1,float16,float16,0,0.10820266604423523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,8,64,0,1,float16,fp8,0,0.10803199807802837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,8,64,0,1,fp8,fp8,0,0.14131200313568115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,64,64,0,1,float16,float16,0,0.06690133114655812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,64,64,0,1,float16,fp8,0,0.06621866424878438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,64,64,0,1,fp8,fp8,0,0.08379733562469482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,1,64,0,1,float16,float16,0,0.06485333542029063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,1,64,0,1,float16,fp8,0,0.06553600231806438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,1,64,0,1,fp8,fp8,0,0.08311466872692108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,2,64,0,1,float16,float16,0,0.06502399841944377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,2,64,0,1,float16,fp8,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,2,64,0,1,fp8,fp8,0,0.08140799899895985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,4,64,0,1,float16,float16,0,0.06570666531721751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,4,64,0,1,float16,fp8,0,0.06348800162474315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,8,64,0,1,float16,fp8,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,4,64,0,1,fp8,fp8,0,0.08243200182914734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,8,64,0,1,float16,float16,0,0.06348800162474315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,8,64,0,1,fp8,fp8,0,0.08123733103275299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,64,64,0,1,float16,float16,0,0.039936001102129616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,64,64,0,1,float16,fp8,0,0.038912000755469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,64,64,0,1,fp8,fp8,0,0.04420266548792521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,1,64,0,1,float16,float16,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,1,64,0,1,float16,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,1,64,0,1,fp8,fp8,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,2,64,0,1,float16,float16,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,2,64,0,1,fp8,fp8,0,0.04437333345413208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,4,64,0,1,float16,float16,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,2,64,0,1,float16,fp8,0,0.03908266623814901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,4,64,0,1,float16,fp8,0,0.03874133278926214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,4,64,0,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,8,64,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,8,64,0,1,float16,fp8,0,0.038912000755469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,8,64,0,1,fp8,fp8,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,1,64,0,1,float16,fp8,0,4.080128033955892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,1,64,0,1,float16,float16,0,4.076544125874837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,1,64,0,1,fp8,fp8,0,3.6416854858398438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,2,64,0,1,float16,float16,0,4.38374392191569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,2,64,0,1,fp8,fp8,0,3.862527847290039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,2,64,0,1,float16,fp8,0,4.341077486673991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,4,64,0,1,float16,float16,0,4.465322812398274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,4,64,0,1,float16,fp8,0,4.411391894022624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,4,64,0,1,fp8,fp8,0,4.035072008768718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,8,64,0,1,float16,float16,0,4.780543963114421
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,8,64,0,1,float16,fp8,0,4.672682762145996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,8,64,0,1,fp8,fp8,0,4.289706548055013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,1,64,0,1,float16,float16,0,1.9618132909138997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,64,64,0,1,float16,fp8,0,4.702720006306966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,64,64,0,1,float16,float16,0,5.023232142130534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,64,64,0,1,fp8,fp8,0,3.8707199096679688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,1,64,0,1,float16,fp8,0,1.9531092643737793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,1,64,0,1,fp8,fp8,0,1.779029369354248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,2,64,0,1,float16,float16,0,2.0677973429361978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,2,64,0,1,float16,fp8,0,2.0609706242879233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,2,64,0,1,fp8,fp8,0,1.8472960789998372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,4,64,0,1,float16,float16,0,2.1505707105000815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,4,64,0,1,float16,fp8,0,2.128213405609131
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,4,64,0,1,fp8,fp8,0,1.9234132766723633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,8,64,0,1,float16,float16,0,2.313386599222819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,8,64,0,1,float16,fp8,0,2.2780586878458657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,8,64,0,1,fp8,fp8,0,2.0200106302897134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,64,64,0,1,float16,float16,0,2.4388267199198403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,1,64,0,1,float16,float16,0,0.936789353688558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,64,64,0,1,float16,fp8,0,2.3198720614115396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,64,64,0,1,fp8,fp8,0,1.881941318511963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,1,64,0,1,float16,fp8,0,0.9361066818237305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,1,64,0,1,fp8,fp8,0,0.9111893177032471
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,2,64,0,1,float16,float16,0,0.98798934618632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,2,64,0,1,float16,fp8,0,0.9832106431325277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,2,64,0,1,fp8,fp8,0,0.9376426537831625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,4,64,0,1,float16,float16,0,1.0064213275909424
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,4,64,0,1,float16,fp8,0,1.0275839964548747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,4,64,0,1,fp8,fp8,0,0.9656319618225098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,8,64,0,1,float16,float16,0,1.1021653016408284
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,8,64,0,1,float16,fp8,0,1.0852693716684978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,8,64,0,1,fp8,fp8,0,1.0076159636179607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,64,64,0,1,float16,float16,0,1.1584853331247966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,64,64,0,1,float16,fp8,0,1.0762240091959636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,64,64,0,1,fp8,fp8,0,0.9245013395945231
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,1,64,0,1,float16,float16,0,0.3601066668828328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,1,64,0,1,float16,fp8,0,0.35839998722076416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,1,64,0,1,fp8,fp8,0,0.44066135088602704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,2,64,0,1,float16,float16,0,0.3763200044631958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,2,64,0,1,float16,fp8,0,0.3676160176595052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,2,64,0,1,fp8,fp8,0,0.44646398226420086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,4,64,0,1,float16,float16,0,0.39765334129333496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,4,64,0,1,float16,fp8,0,0.3935573498408
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,4,64,0,1,fp8,fp8,0,0.4689919948577881
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,8,64,0,1,float16,float16,0,0.4500480095545451
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,8,64,0,1,float16,fp8,0,0.4357120196024577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,8,64,0,1,fp8,fp8,0,0.5024426778157552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,64,64,0,1,float16,float16,0,0.4915200074513753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,64,64,0,1,float16,fp8,0,0.4333226680755615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,64,64,0,1,fp8,fp8,0,0.4633599917093913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,1,64,0,1,float16,float16,0,0.1474560002485911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,1,64,0,1,float16,fp8,0,0.1532586713631948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,1,64,0,1,fp8,fp8,0,0.18466132879257202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,2,64,0,1,float16,float16,0,0.1469439963499705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,2,64,0,1,float16,fp8,0,0.15172266960144043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,2,64,0,1,fp8,fp8,0,0.18824533621470133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,4,64,0,1,float16,float16,0,0.15018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,4,64,0,1,float16,fp8,0,0.1539413332939148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,4,64,0,1,fp8,fp8,0,0.202239990234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,8,64,0,1,float16,float16,0,0.1546239952246348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,8,64,0,1,float16,fp8,0,0.15172266960144043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,8,64,0,1,fp8,fp8,0,0.1919999917348226
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,64,64,0,1,float16,float16,0,0.09198932846387227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,64,64,0,1,float16,fp8,0,0.0846506655216217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,64,64,0,1,fp8,fp8,0,0.17971199750900269
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,1,64,0,1,float16,float16,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,1,64,0,1,float16,fp8,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,1,64,0,1,fp8,fp8,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,2,64,0,1,float16,float16,0,0.07884799937407176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,2,64,0,1,float16,fp8,0,0.08072533210118611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,2,64,0,1,fp8,fp8,0,0.10018133123715718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,4,64,0,1,float16,float16,0,0.08260266482830048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,4,64,0,1,float16,fp8,0,0.08123733103275299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,4,64,0,1,fp8,fp8,0,0.10291199882825215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,8,64,0,1,float16,float16,0,0.07935999830563863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,8,64,0,1,float16,fp8,0,0.08089600006739299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,8,64,0,1,fp8,fp8,0,0.10001066327095032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,64,64,0,1,float16,fp8,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,64,64,0,1,float16,float16,0,0.05000533163547516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,64,64,0,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,1,64,0,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,1,64,0,1,float16,fp8,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,1,64,0,1,fp8,fp8,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,2,64,0,1,float16,float16,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,2,64,0,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,2,64,0,1,fp8,fp8,0,0.05870933334032694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,4,64,0,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,4,64,0,1,float16,fp8,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,4,64,0,1,fp8,fp8,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,8,64,0,1,float16,float16,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,8,64,0,1,float16,fp8,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,8,64,0,1,fp8,fp8,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,64,64,0,1,float16,float16,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,64,64,0,1,float16,fp8,0,0.028330666323502857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,64,64,0,1,fp8,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,1,64,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,1,64,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,1,64,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,2,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,2,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,2,64,0,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,4,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,4,64,0,1,float16,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,4,64,0,1,fp8,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,8,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,8,64,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,8,64,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,64,64,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,64,64,0,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,64,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,1,64,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,1,64,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,1,64,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,2,64,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,2,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,2,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,4,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,4,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,8,64,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,4,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,8,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,8,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,1,64,0,1,float16,float16,0,1.9590826034545898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,1,64,0,1,fp8,fp8,0,1.5322453180948894
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,1,64,0,1,float16,fp8,0,1.950208028157552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,2,64,0,1,float16,float16,0,2.0392959912618003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,2,64,0,1,fp8,fp8,0,1.5778133074442546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,2,64,0,1,float16,fp8,0,2.0266666412353516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,4,64,0,1,float16,float16,0,2.14408540725708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,4,64,0,1,float16,fp8,0,2.1113173166910806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,4,64,0,1,fp8,fp8,0,1.6580266952514648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,8,64,0,1,float16,float16,0,2.334719975789388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,8,64,0,1,fp8,fp8,0,1.7582079569498699
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,8,64,0,1,float16,fp8,0,2.298367977142334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,64,64,0,1,float16,fp8,0,2.3123626708984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,64,64,0,1,float16,float16,0,2.4796160062154136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,1,64,0,1,float16,float16,0,0.9084586302439371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,64,64,0,1,fp8,fp8,0,1.8527572949727376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,1,64,0,1,float16,fp8,0,0.9122133255004883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,1,64,0,1,fp8,fp8,0,0.7703893184661865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,2,64,0,1,float16,float16,0,0.935424009958903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,2,64,0,1,float16,fp8,0,0.9402026335398356
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,2,64,0,1,fp8,fp8,0,0.7855786482493082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,4,64,0,1,float16,float16,0,1.0023252964019775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,4,64,0,1,float16,fp8,0,0.9833813508351644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,4,64,0,1,fp8,fp8,0,0.8106666405995687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,8,64,0,1,float16,float16,0,1.1165013313293457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,8,64,0,1,float16,fp8,0,1.094655990600586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,8,64,0,1,fp8,fp8,0,0.8712533315022787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,64,64,0,1,float16,float16,0,1.1752106348673503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,64,64,0,1,float16,fp8,0,1.0956799983978271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,64,64,0,1,fp8,fp8,0,0.9106773535410563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,1,64,0,1,float16,fp8,0,0.34201598167419434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,1,64,0,1,float16,float16,0,0.34508800506591797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,1,64,0,1,fp8,fp8,0,0.38997332255045575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,2,64,0,1,float16,float16,0,0.36505599816640216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,2,64,0,1,float16,fp8,0,0.36181334654490155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,2,64,0,1,fp8,fp8,0,0.39082666238149005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,4,64,0,1,float16,float16,0,0.3968000014623006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,4,64,0,1,float16,fp8,0,0.39458131790161133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,4,64,0,1,fp8,fp8,0,0.40089599291483563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,8,64,0,1,float16,float16,0,0.4551680088043213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,8,64,0,1,float16,fp8,0,0.4394666751225789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,8,64,0,1,fp8,fp8,0,0.4384426673253377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,64,64,0,1,float16,float16,0,0.501418670018514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,64,64,0,1,float16,fp8,0,0.43383467197418213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,64,64,0,1,fp8,fp8,0,0.44629331429799396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,1,64,0,1,float16,float16,0,0.11281067132949829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,1,64,0,1,float16,fp8,0,0.11349333326021831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,1,64,0,1,fp8,fp8,0,0.14421332875887552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,2,64,0,1,float16,float16,0,0.11878400047620137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,2,64,0,1,float16,fp8,0,0.11485866705576579
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,2,64,0,1,fp8,fp8,0,0.14353066682815552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,4,64,0,1,float16,float16,0,0.1264639993508657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,4,64,0,1,float16,fp8,0,0.12458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,4,64,0,1,fp8,fp8,0,0.144896000623703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,8,64,0,1,float16,float16,0,0.1281706690788269
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,8,64,0,1,float16,fp8,0,0.12322133779525757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,8,64,0,1,fp8,fp8,0,0.15069866180419922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,64,64,0,1,float16,float16,0,0.08157866696516673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,64,64,0,1,float16,fp8,0,0.06724266707897186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,1,64,0,1,float16,float16,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,64,64,0,1,fp8,fp8,0,0.15957333644231161
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,1,64,0,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,1,64,0,1,fp8,fp8,0,0.07935999830563863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,2,64,0,1,float16,fp8,0,0.0602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,2,64,0,1,float16,float16,0,0.06109866499900818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,2,64,0,1,fp8,fp8,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,4,64,0,1,float16,float16,0,0.059903999169667564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,4,64,0,1,float16,fp8,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,4,64,0,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,8,64,0,1,float16,float16,0,0.06092800199985504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,8,64,0,1,fp8,fp8,0,0.07799466451009114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,8,64,0,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,64,64,0,1,float16,float16,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,64,64,0,1,float16,fp8,0,0.03942399968703588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,64,64,0,1,fp8,fp8,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,1,64,0,1,float16,float16,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,1,64,0,1,float16,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,1,64,0,1,fp8,fp8,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,2,64,0,1,float16,float16,0,0.036864000062147774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,2,64,0,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,2,64,0,1,fp8,fp8,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,4,64,0,1,float16,float16,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,4,64,0,1,float16,fp8,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,4,64,0,1,fp8,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,8,64,0,1,float16,float16,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,8,64,0,1,float16,fp8,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,8,64,0,1,fp8,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,64,64,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,64,64,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,64,64,0,1,fp8,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,1,64,0,1,float16,float16,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,1,64,0,1,float16,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,1,64,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,2,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,2,64,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,2,64,0,1,fp8,fp8,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,4,64,0,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,4,64,0,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,4,64,0,1,fp8,fp8,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,8,64,0,1,float16,float16,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,8,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,8,64,0,1,fp8,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,64,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,64,64,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,64,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,1,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,1,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,1,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,2,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,2,64,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,2,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,4,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,4,64,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,4,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,8,64,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,8,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,8,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,64,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,64,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,64,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,1,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,1,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,1,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,2,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,2,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,2,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,4,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,4,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,4,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,8,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,8,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,8,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,1,64,0,1,float16,float16,0,0.9188693364461263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,1,64,0,1,float16,fp8,0,0.9146026770273844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,1,64,0,1,fp8,fp8,0,1.001301368077596
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,2,64,0,1,float16,float16,0,0.951637347539266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,2,64,0,1,fp8,fp8,0,1.0292906761169434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,2,64,0,1,float16,fp8,0,0.9562453428904215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,4,64,0,1,float16,float16,0,1.0112000306447346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,4,64,0,1,float16,fp8,0,0.9983999729156494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,4,64,0,1,fp8,fp8,0,1.074005365371704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,8,64,0,1,float16,fp8,0,1.108138640721639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,8,64,0,1,float16,float16,0,1.1246933142344158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,64,64,0,1,float16,float16,0,1.2028586864471436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,8,64,0,1,fp8,fp8,0,1.1335679690043132
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,64,64,0,1,float16,fp8,0,1.1059199968973796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,64,64,0,1,fp8,fp8,0,0.966655969619751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,1,64,0,1,float16,float16,0,0.34628268082936603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,1,64,0,1,float16,fp8,0,0.33740798632303876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,1,64,0,1,fp8,fp8,0,0.49851731459299725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,2,64,0,1,float16,float16,0,0.3657386700312297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,2,64,0,1,float16,fp8,0,0.36078933874766034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,4,64,0,1,float16,float16,0,0.3930453459421794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,2,64,0,1,fp8,fp8,0,0.5036373138427734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,4,64,0,1,float16,fp8,0,0.3862186670303345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,4,64,0,1,fp8,fp8,0,0.5275306701660156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,8,64,0,1,float16,float16,0,0.4626773198445638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,8,64,0,1,float16,fp8,0,0.4427093267440796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,8,64,0,1,fp8,fp8,0,0.5625173250834147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,64,64,0,1,float16,float16,0,0.5039786497751871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,64,64,0,1,float16,fp8,0,0.4363946517308553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,64,64,0,1,fp8,fp8,0,0.4848639965057373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,1,64,0,1,float16,float16,0,0.09301333626111348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,1,64,0,1,float16,fp8,0,0.09045333663622539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,1,64,0,1,fp8,fp8,0,0.20070399840672812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,2,64,0,1,float16,float16,0,0.09335466225941975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,2,64,0,1,float16,fp8,0,0.09318400422732036
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,2,64,0,1,fp8,fp8,0,0.20053333044052124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,4,64,0,1,float16,float16,0,0.09454933802286784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,4,64,0,1,float16,fp8,0,0.09386666615804036
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,4,64,0,1,fp8,fp8,0,0.2032639980316162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,8,64,0,1,float16,float16,0,0.10240000486373901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,8,64,0,1,float16,fp8,0,0.0962559978167216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,8,64,0,1,fp8,fp8,0,0.211626668771108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,64,64,0,1,float16,fp8,0,0.05495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,64,64,0,1,float16,float16,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,64,64,0,1,fp8,fp8,0,0.1925119956334432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,1,64,0,1,float16,float16,0,0.048810665806134544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,1,64,0,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,1,64,0,1,fp8,fp8,0,0.1063253382841746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,2,64,0,1,float16,float16,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,2,64,0,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,2,64,0,1,fp8,fp8,0,0.1063253382841746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,4,64,0,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,4,64,0,1,float16,fp8,0,0.05000533163547516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,4,64,0,1,fp8,fp8,0,0.10752000411351521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,8,64,0,1,float16,float16,0,0.05120000243186951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,8,64,0,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,64,64,0,1,float16,float16,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,8,64,0,1,fp8,fp8,0,0.10752000411351521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,64,64,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,64,64,0,1,fp8,fp8,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,1,64,0,1,float16,float16,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,1,64,0,1,float16,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,1,64,0,1,fp8,fp8,0,0.061610668897628784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,2,64,0,1,float16,float16,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,2,64,0,1,float16,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,2,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,4,64,0,1,float16,float16,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,4,64,0,1,float16,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,4,64,0,1,fp8,fp8,0,0.062122667829195656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,8,64,0,1,float16,float16,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,8,64,0,1,float16,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,8,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,64,64,0,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,64,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,64,64,0,1,fp8,fp8,0,0.036864000062147774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,1,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,1,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,1,64,0,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,2,64,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,2,64,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,2,64,0,1,fp8,fp8,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,4,64,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,4,64,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,4,64,0,1,fp8,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,8,64,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,8,64,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,8,64,0,1,fp8,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,64,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,64,64,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,64,64,0,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,1,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,1,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,1,64,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,2,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,2,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,2,64,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,4,64,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,4,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,4,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,8,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,8,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,8,64,0,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,64,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,64,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,64,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,1,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,1,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,1,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,2,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,2,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,2,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,4,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,4,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,4,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,8,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,8,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,8,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,64,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,64,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,64,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,1,64,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,1,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,1,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,2,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,2,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,4,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,4,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,4,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,8,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,8,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,8,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,1,64,0,1,float16,float16,0,0.3179519971211751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,1,64,0,1,float16,fp8,0,0.3094186584154765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,1,64,0,1,fp8,fp8,0,0.7579306761423746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,2,64,0,1,float16,float16,0,0.3346773386001587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,2,64,0,1,float16,fp8,0,0.3295573393503825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,4,64,0,1,float16,float16,0,0.36454399426778156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,2,64,0,1,fp8,fp8,0,0.7654399871826172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,4,64,0,1,float16,fp8,0,0.3590826590855916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,4,64,0,1,fp8,fp8,0,0.787285327911377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,8,64,0,1,float16,float16,0,0.4478293259938558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,8,64,0,1,float16,fp8,0,0.42444801330566406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,64,64,0,1,float16,float16,0,0.505514661471049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,8,64,0,1,fp8,fp8,0,0.8217600186665853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,64,64,0,1,float16,fp8,0,0.44390400250752765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,64,64,0,1,fp8,fp8,0,0.6132053136825562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,1,64,0,1,float16,float16,0,0.10188800096511841
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,1,64,0,1,float16,fp8,0,0.10240000486373901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,1,64,0,1,fp8,fp8,0,0.33126399914423627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,2,64,0,1,float16,float16,0,0.106495996316274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,2,64,0,1,float16,fp8,0,0.10478933652242024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,2,64,0,1,fp8,fp8,0,0.3309226632118225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,4,64,0,1,float16,float16,0,0.10461866855621338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,4,64,0,1,float16,fp8,0,0.10700800021489461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,4,64,0,1,fp8,fp8,0,0.3326293428738912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,8,64,0,1,float16,float16,0,0.10854400197664897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,8,64,0,1,float16,fp8,0,0.10547199845314026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,8,64,0,1,fp8,fp8,0,0.3426986535390218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,64,64,0,1,float16,fp8,0,0.0602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,64,64,0,1,float16,float16,0,0.08328533172607422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,64,64,0,1,fp8,fp8,0,0.2629973292350769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,1,64,0,1,float16,float16,0,0.053930665055910744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,1,64,0,1,float16,fp8,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,1,64,0,1,fp8,fp8,0,0.1723733345667521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,2,64,0,1,float16,fp8,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,2,64,0,1,float16,float16,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,2,64,0,1,fp8,fp8,0,0.17339734236399332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,4,64,0,1,float16,float16,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,4,64,0,1,float16,fp8,0,0.054101333022117615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,4,64,0,1,fp8,fp8,0,0.17322667439778647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,8,64,0,1,float16,float16,0,0.05495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,8,64,0,1,float16,fp8,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,8,64,0,1,fp8,fp8,0,0.17407999436060587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,64,64,0,1,float16,float16,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,64,64,0,1,float16,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,64,64,0,1,fp8,fp8,0,0.09762133161226909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,1,64,0,1,float16,float16,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,1,64,0,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,1,64,0,1,fp8,fp8,0,0.09454933802286784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,2,64,0,1,float16,float16,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,2,64,0,1,float16,fp8,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,2,64,0,1,fp8,fp8,0,0.09471999605496724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,4,64,0,1,float16,float16,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,4,64,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,4,64,0,1,fp8,fp8,0,0.09523199995358785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,8,64,0,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,8,64,0,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,64,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,8,64,0,1,fp8,fp8,0,0.09506133198738098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,64,64,0,1,fp8,fp8,0,0.05649066468079885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,64,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,1,64,0,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,1,64,0,1,float16,fp8,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,1,64,0,1,fp8,fp8,0,0.05529599885145823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,2,64,0,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,2,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,4,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,2,64,0,1,fp8,fp8,0,0.0554666668176651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,4,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,4,64,0,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,8,64,0,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,8,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,8,64,0,1,fp8,fp8,0,0.05614933371543884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,64,64,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,64,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,64,64,0,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,1,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,1,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,1,64,0,1,fp8,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,2,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,2,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,2,64,0,1,fp8,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,4,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,4,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,4,64,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,8,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,8,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,8,64,0,1,fp8,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,64,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,64,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,64,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,1,64,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,1,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,2,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,1,64,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,2,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,2,64,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,4,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,4,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,4,64,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,8,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,8,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,8,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,64,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,64,64,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,64,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,1,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,1,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,1,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,2,64,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,4,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,4,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,8,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,8,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,8,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,64,64,0,1,float16,float16,0,0.009162666896979014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,64,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,64,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,1,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,1,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,2,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,2,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,4,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,8,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,4,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,8,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,1,64,0,1,float16,fp8,0,0.15103999773661295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,1,64,0,1,float16,float16,0,0.15018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,1,64,0,1,fp8,fp8,0,0.5990399916966757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,2,64,0,1,float16,float16,0,0.15615999698638916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,2,64,0,1,float16,fp8,0,0.15496533115704855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,2,64,0,1,fp8,fp8,0,0.5995519955952963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,4,64,0,1,float16,float16,0,0.15633066495259604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,4,64,0,1,float16,fp8,0,0.15786666671435037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,8,64,0,1,float16,float16,0,0.15667200088500977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,4,64,0,1,fp8,fp8,0,0.6036479870478312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,8,64,0,1,float16,fp8,0,0.16196266810099283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,64,64,0,1,float16,float16,0,0.10410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,8,64,0,1,fp8,fp8,0,0.6109866698582967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,64,64,0,1,float16,fp8,0,0.08072533210118611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,64,64,0,1,fp8,fp8,0,0.3974826733271281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,1,64,0,1,float16,float16,0,0.0773119976123174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,1,64,0,1,float16,fp8,0,0.07748266557852428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,1,64,0,1,fp8,fp8,0,0.30532266696294147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,2,64,0,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,2,64,0,1,float16,fp8,0,0.07833600044250488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,2,64,0,1,fp8,fp8,0,0.3068586587905884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,4,64,0,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,4,64,0,1,float16,fp8,0,0.07799466451009114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,4,64,0,1,fp8,fp8,0,0.30617600679397583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,8,64,0,1,float16,float16,0,0.07884799937407176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,8,64,0,1,float16,fp8,0,0.07799466451009114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,8,64,0,1,fp8,fp8,0,0.30668799082438153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,64,64,0,1,float16,float16,0,0.04437333345413208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,64,64,0,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,1,64,0,1,float16,float16,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,64,64,0,1,fp8,fp8,0,0.16366933782895407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,1,64,0,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,1,64,0,1,fp8,fp8,0,0.1609386702378591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,2,64,0,1,float16,float16,0,0.04437333345413208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,2,64,0,1,float16,fp8,0,0.04420266548792521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,2,64,0,1,fp8,fp8,0,0.16127999623616537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,4,64,0,1,float16,float16,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,4,64,0,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,4,64,0,1,fp8,fp8,0,0.16196266810099283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,8,64,0,1,float16,float16,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,8,64,0,1,float16,fp8,0,0.04437333345413208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,8,64,0,1,fp8,fp8,0,0.16110933820406595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,64,64,0,1,float16,float16,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,64,64,0,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,64,64,0,1,fp8,fp8,0,0.08891733487447102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,1,64,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,1,64,0,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,1,64,0,1,fp8,fp8,0,0.08772266904513042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,2,64,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,2,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,2,64,0,1,fp8,fp8,0,0.08772266904513042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,4,64,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,4,64,0,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,4,64,0,1,fp8,fp8,0,0.08823466300964355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,8,64,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,8,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,8,64,0,1,fp8,fp8,0,0.08874666690826416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,64,64,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,64,64,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,64,64,0,1,fp8,fp8,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,1,64,0,1,float16,float16,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,1,64,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,1,64,0,1,fp8,fp8,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,2,64,0,1,float16,float16,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,2,64,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,2,64,0,1,fp8,fp8,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,4,64,0,1,float16,float16,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,4,64,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,8,64,0,1,float16,float16,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,4,64,0,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,8,64,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,8,64,0,1,fp8,fp8,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,64,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,64,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,64,64,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,1,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,1,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,1,64,0,1,fp8,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,2,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,2,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,2,64,0,1,fp8,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,4,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,4,64,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,4,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,8,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,8,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,8,64,0,1,fp8,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,64,64,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,64,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,64,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,1,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,2,64,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,2,64,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,4,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,4,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,8,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,8,64,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,8,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,64,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,64,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,64,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,1,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,1,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,1,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,2,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,2,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,4,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,8,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,8,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,8,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,64,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,64,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,64,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,1,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,1,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,4,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,8,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,8,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,1,64,0,1,float16,fp8,0,95.2596435546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,1,64,0,1,float16,float16,0,96.11246744791667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,fp8,0,92.25062052408855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,float16,0,94.5587158203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,float16,0,93.16505940755208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,fp8,0,93.03262329101562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,1,64,0,1,fp8,fp8,0,117.1763203938802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,2,64,0,1,fp8,fp8,0,118.86984252929688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,float16,0,46.915924072265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,fp8,0,46.754302978515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,1,64,0,1,float16,float16,0,45.08330790201823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,48,64,0,1,fp8,fp8,0,61.46235656738281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,fp8,0,91.93198649088542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,float16,0,95.09325154622395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,4,64,0,1,fp8,fp8,0,119.29395548502605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,1,64,0,1,float16,fp8,0,45.45877583821615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,8,64,0,1,fp8,fp8,0,120.48793538411458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,1,64,0,1,fp8,fp8,0,58.691243489583336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,float16,0,44.5837656656901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,fp8,0,44.00144958496094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,float16,0,45.38829040527344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,2,64,0,1,fp8,fp8,0,58.06438191731771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,fp8,0,44.992513020833336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,float16,0,22.688425699869793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,float16,0,44.38050333658854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,4,64,0,1,fp8,fp8,0,58.531840006510414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,fp8,0,44.73497517903646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,fp8,0,23.634432474772137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,8,64,0,1,fp8,fp8,0,58.649943033854164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,48,64,0,1,fp8,fp8,0,30.488576253255207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,1,64,0,1,float16,float16,0,23.074305216471355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,1,64,0,1,float16,fp8,0,22.448811848958332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,float16,0,22.783658345540363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,1,64,0,1,fp8,fp8,0,29.134506225585938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,fp8,0,23.039830525716145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,2,64,0,1,fp8,fp8,0,29.481470743815105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,float16,0,22.378496805826824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,fp8,0,22.022486368815105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,float16,0,12.239189147949219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,float16,0,21.948758443196613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,4,64,0,1,fp8,fp8,0,29.117268880208332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,fp8,0,22.070782979329426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,fp8,0,12.17962646484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,8,64,0,1,fp8,fp8,0,29.30005391438802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,1,64,0,1,float16,float16,0,11.714900970458984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,48,64,0,1,fp8,fp8,0,15.226027170817057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,1,64,0,1,float16,fp8,0,11.986090342203775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,float16,0,11.666943868001303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,1,64,0,1,fp8,fp8,0,14.816767374674479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,fp8,0,11.749205271402994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,2,64,0,1,fp8,fp8,0,15.032831827799479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,float16,0,11.652437845865885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,fp8,0,11.48245366414388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,4,64,0,1,fp8,fp8,0,14.74013900756836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,float16,0,11.76473617553711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,fp8,0,11.74630355834961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,8,64,0,1,fp8,fp8,0,14.896469116210938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,1,64,0,1,float16,float16,0,51.73486836751302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,1,64,0,1,float16,fp8,0,53.772969563802086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,float16,0,51.32868448893229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,fp8,0,51.96851094563802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,float16,0,51.35052998860677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,1,64,0,1,fp8,fp8,0,67.05151875813802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,fp8,0,50.603861490885414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,2,64,0,1,fp8,fp8,0,68.37759908040364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,float16,0,27.16552480061849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,fp8,0,27.251370747884113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,1,64,0,1,float16,float16,0,26.766507466634113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,48,64,0,1,fp8,fp8,0,35.34899139404297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,fp8,0,50.877950032552086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,float16,0,53.192362467447914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,4,64,0,1,fp8,fp8,0,67.76934305826823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,8,64,0,1,fp8,fp8,0,68.22878011067708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,1,64,0,1,float16,fp8,0,26.2476806640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,1,64,0,1,fp8,fp8,0,33.18988800048828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,float16,0,26.443435668945312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,fp8,0,25.640106201171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,float16,0,26.503509521484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,fp8,0,25.538047790527344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,2,64,0,1,fp8,fp8,0,33.00591023763021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,float16,0,13.955242156982422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,float16,0,25.575764973958332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,4,64,0,1,fp8,fp8,0,33.040384928385414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,fp8,0,14.036309560139975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,1,64,0,1,float16,float16,0,13.419007619222006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,fp8,0,25.985877990722656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,48,64,0,1,fp8,fp8,0,17.53873062133789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,8,64,0,1,fp8,fp8,0,33.80036163330078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,1,64,0,1,float16,fp8,0,13.431296030680338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,1,64,0,1,fp8,fp8,0,16.58555730183919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,float16,0,13.321216583251953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,fp8,0,13.696341196695963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,float16,0,13.347840627034506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,fp8,0,13.21011225382487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,2,64,0,1,fp8,fp8,0,16.532821655273438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,4,64,0,1,fp8,fp8,0,16.85930633544922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,float16,0,7.375189463297526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,float16,0,13.28042729695638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,fp8,0,13.077674865722656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,fp8,0,7.12823486328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,1,64,0,1,float16,float16,0,6.376448313395183
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,48,64,0,1,fp8,fp8,0,8.91426150004069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,8,64,0,1,fp8,fp8,0,16.66389338175456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,1,64,0,1,float16,fp8,0,6.596778869628906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,float16,0,6.777173360188802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,1,64,0,1,fp8,fp8,0,8.655871709187826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,fp8,0,6.555989583333333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,float16,0,6.225066502888997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,fp8,0,7.002623875935872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,2,64,0,1,fp8,fp8,0,8.572757085164389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,4,64,0,1,fp8,fp8,0,8.425130844116211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,float16,0,6.599679946899414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,fp8,0,6.650197347005208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,8,64,0,1,fp8,fp8,0,8.658432006835938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,1,64,0,1,float16,float16,0,36.709716796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,1,64,0,1,float16,fp8,0,36.49706776936849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,float16,0,37.05582936604818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,fp8,0,36.97459157307943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,1,64,0,1,fp8,fp8,0,46.46092732747396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,float16,0,36.16767883300781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,2,64,0,1,fp8,fp8,0,47.19974263509115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,fp8,0,37.589332580566406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,float16,0,18.727935791015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,fp8,0,18.6059087117513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,float16,0,36.07824961344401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,1,64,0,1,float16,float16,0,19.481258392333984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,fp8,0,36.75050608317057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,48,64,0,1,fp8,fp8,0,25.12401072184245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,4,64,0,1,fp8,fp8,0,46.88281758626302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,8,64,0,1,fp8,fp8,0,47.28712463378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,1,64,0,1,float16,fp8,0,18.847403208414715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,float16,0,18.41595713297526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,1,64,0,1,fp8,fp8,0,23.217152913411457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,fp8,0,18.997931162516277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,fp8,0,18.441898345947266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,2,64,0,1,fp8,fp8,0,23.309824625651043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,float16,0,18.77179718017578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,4,64,0,1,fp8,fp8,0,23.269887288411457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,float16,0,18.410837809244793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,float16,0,10.335914611816406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,1,64,0,1,float16,float16,0,9.758549372355143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,fp8,0,10.110122680664062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,fp8,0,18.276351928710938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,48,64,0,1,fp8,fp8,0,12.646570841471354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,8,64,0,1,fp8,fp8,0,23.514623006184895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,1,64,0,1,float16,fp8,0,9.643690745035807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,float16,0,9.603925069173178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,1,64,0,1,fp8,fp8,0,11.781973520914713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,float16,0,9.577130635579428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,fp8,0,10.074965159098307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,2,64,0,1,fp8,fp8,0,11.774293263753256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,fp8,0,9.586175918579102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,4,64,0,1,fp8,fp8,0,11.744085947672525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,float16,0,9.410048166910807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,fp8,0,10.182826360066732
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,float16,0,4.802389462788899
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,fp8,0,4.588543891906738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,8,64,0,1,fp8,fp8,0,11.842730204264322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,1,64,0,1,float16,float16,0,4.648277282714844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,48,64,0,1,fp8,fp8,0,6.339071909586589
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,1,64,0,1,float16,fp8,0,4.509866714477539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,1,64,0,1,fp8,fp8,0,5.878271738688151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,float16,0,4.241920153299968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,fp8,0,4.528469403584798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,2,64,0,1,fp8,fp8,0,5.974698384602864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,float16,0,4.616703987121582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,fp8,0,4.584277470906575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,4,64,0,1,fp8,fp8,0,5.961386362711589
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,float16,0,4.520277341206868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,fp8,0,3.9041706720987954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,8,64,0,1,fp8,fp8,0,6.102869033813477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,1,64,0,1,float16,float16,0,48.20667521158854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,1,64,0,1,float16,fp8,0,49.77766418457031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,float16,0,49.96983337402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,fp8,0,49.05181884765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,float16,0,49.08527119954427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,1,64,0,1,fp8,fp8,0,61.56748962402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,fp8,0,50.04100036621094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,2,64,0,1,fp8,fp8,0,63.62982177734375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,float16,0,26.41868845621745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,fp8,0,26.1748046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,1,64,0,1,float16,float16,0,23.975252787272137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,float16,0,49.90446980794271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,48,64,0,1,fp8,fp8,0,33.83978780110677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,fp8,0,49.41619364420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,4,64,0,1,fp8,fp8,0,63.46427917480469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,8,64,0,1,fp8,fp8,0,64.44134521484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,1,64,0,1,float16,fp8,0,24.543744405110676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,1,64,0,1,fp8,fp8,0,30.15099843343099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,float16,0,23.8745600382487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,fp8,0,24.61713155110677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,float16,0,24.48230489095052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,2,64,0,1,fp8,fp8,0,30.642857869466145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,fp8,0,23.74383036295573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,4,64,0,1,fp8,fp8,0,30.505813598632812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,float16,0,24.181930541992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,float16,0,13.3481814066569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,fp8,0,13.160959879557291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,fp8,0,24.005633036295574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,1,64,0,1,float16,float16,0,12.413951873779297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,48,64,0,1,fp8,fp8,0,16.784042358398438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,8,64,0,1,fp8,fp8,0,30.879231770833332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,1,64,0,1,float16,fp8,0,12.580010732014975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,1,64,0,1,fp8,fp8,0,15.044778188069662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,float16,0,12.69589360555013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,fp8,0,12.803583780924479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,2,64,0,1,fp8,fp8,0,15.21664047241211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,float16,0,12.256768544514975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,fp8,0,12.624212900797525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,4,64,0,1,fp8,fp8,0,15.242239634195963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,float16,0,12.409343719482422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,float16,0,6.8795731862386065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,fp8,0,6.637738545735677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,fp8,0,12.471125284830729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,1,64,0,1,float16,float16,0,5.893119812011719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,48,64,0,1,fp8,fp8,0,8.262997309366861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,8,64,0,1,fp8,fp8,0,15.419904073079428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,1,64,0,1,float16,fp8,0,5.7963517506917315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,1,64,0,1,fp8,fp8,0,7.639381408691406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,float16,0,6.2820695241292315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,fp8,0,6.355797449747722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,float16,0,5.623978932698567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,2,64,0,1,fp8,fp8,0,7.605589548746745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,fp8,0,6.045354843139648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,4,64,0,1,fp8,fp8,0,7.7361494700113935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,float16,0,5.835605621337891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,fp8,0,6.227285385131836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,float16,0,3.0382080078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,8,64,0,1,fp8,fp8,0,7.717205047607422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,fp8,0,3.0986239115397134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,1,64,0,1,float16,float16,0,2.7253761291503906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,48,64,0,1,fp8,fp8,0,4.075349489847819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,1,64,0,1,float16,fp8,0,2.8475732803344727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,1,64,0,1,fp8,fp8,0,3.8167893091837564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,float16,0,2.6465279261271157
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,fp8,0,2.6489173571268716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,float16,0,2.8136107126871743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,2,64,0,1,fp8,fp8,0,3.817813237508138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,fp8,0,2.5983999570210776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,4,64,0,1,fp8,fp8,0,3.7799253463745117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,float16,0,2.6728105545043945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,fp8,0,2.7475627263387046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,8,64,0,1,fp8,fp8,0,3.813546816507975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,1,64,0,1,float16,float16,0,28.56908925374349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,1,64,0,1,float16,fp8,0,29.29100799560547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,float16,0,28.48290252685547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,fp8,0,28.895060221354168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,1,64,0,1,fp8,fp8,0,34.91003672281901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,float16,0,28.144126892089844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,2,64,0,1,fp8,fp8,0,35.95246887207031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,fp8,0,28.396886189778645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,fp8,0,15.273813883463541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,float16,0,15.885140736897787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,1,64,0,1,float16,float16,0,14.618111928304037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,fp8,0,28.087979634602863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,48,64,0,1,fp8,fp8,0,19.662335713704426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,float16,0,29.359786987304688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,4,64,0,1,fp8,fp8,0,35.8099619547526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,8,64,0,1,fp8,fp8,0,36.3325449625651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,1,64,0,1,float16,fp8,0,14.471338907877604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,float16,0,14.491135915120443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,1,64,0,1,fp8,fp8,0,17.336490631103516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,fp8,0,14.222506205240885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,float16,0,14.504960378011068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,fp8,0,14.45580800374349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,2,64,0,1,fp8,fp8,0,17.532416025797527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,4,64,0,1,fp8,fp8,0,17.41926447550456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,float16,0,14.579029083251953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,float16,0,7.924906412760417
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,fp8,0,14.331562042236328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,fp8,0,7.885994593302409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,1,64,0,1,float16,float16,0,7.543296178181966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,48,64,0,1,fp8,fp8,0,9.764352162679037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,8,64,0,1,fp8,fp8,0,17.74131139119466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,1,64,0,1,float16,fp8,0,6.956885019938151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,1,64,0,1,fp8,fp8,0,8.731989542643229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,float16,0,7.245482762654622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,fp8,0,6.486186981201172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,float16,0,7.358293533325195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,2,64,0,1,fp8,fp8,0,8.868864059448242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,fp8,0,7.521280288696289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,4,64,0,1,fp8,fp8,0,8.758101145426432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,float16,0,6.963029225667317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,float16,0,3.7360639572143555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,fp8,0,3.71012274424235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,1,64,0,1,float16,float16,0,3.105109214782715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,fp8,0,7.312896092732747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,48,64,0,1,fp8,fp8,0,4.879530588785808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,8,64,0,1,fp8,fp8,0,8.82090695699056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,1,64,0,1,float16,fp8,0,3.012096087137858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,1,64,0,1,fp8,fp8,0,4.338517189025879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,float16,0,3.197781244913737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,fp8,0,3.279189427693685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,2,64,0,1,fp8,fp8,0,4.340053240458171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,float16,0,3.037525177001953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,fp8,0,3.0185813903808594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,float16,0,3.0757548014322915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,4,64,0,1,fp8,fp8,0,4.3458560307820635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,fp8,0,3.1476052602132163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,8,64,0,1,fp8,fp8,0,4.375551859537761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,float16,0,1.8491733868916829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,fp8,0,1.7897814114888508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,1,64,0,1,float16,float16,0,1.58515199025472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,48,64,0,1,fp8,fp8,0,2.3673173586527505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,1,64,0,1,float16,fp8,0,1.5807147026062012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,1,64,0,1,fp8,fp8,0,2.164053281148275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,float16,0,1.5389013290405273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,fp8,0,1.5435093243916829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,float16,0,1.5685973167419434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,2,64,0,1,fp8,fp8,0,2.1655893325805664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,fp8,0,1.5578452746073406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,4,64,0,1,fp8,fp8,0,2.152106602986654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,float16,0,1.5443627039591472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,fp8,0,1.580885410308838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,8,64,0,1,fp8,fp8,0,2.152618726094564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,1,64,0,1,float16,float16,0,27.739476521809895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,1,64,0,1,float16,fp8,0,27.417770385742188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,float16,0,28.816383361816406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,fp8,0,27.728897094726562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,float16,0,27.988479614257812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,1,64,0,1,fp8,fp8,0,33.40509796142578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,fp8,0,27.733505249023438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,2,64,0,1,fp8,fp8,0,35.39660898844401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,fp8,0,15.688021341959635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,1,64,0,1,float16,float16,0,13.995007832845053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,float16,0,15.988564809163412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,48,64,0,1,fp8,fp8,0,19.26963170369466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,float16,0,28.40063985188802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,fp8,0,28.963839213053387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,4,64,0,1,fp8,fp8,0,35.09435780843099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,8,64,0,1,fp8,fp8,0,36.25830332438151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,1,64,0,1,float16,fp8,0,14.423892974853516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,float16,0,13.761706034342447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,1,64,0,1,fp8,fp8,0,16.21777089436849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,fp8,0,13.734911600748697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,2,64,0,1,fp8,fp8,0,16.32921600341797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,float16,0,14.179327646891275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,fp8,0,13.485567728678385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,4,64,0,1,fp8,fp8,0,16.398848215738933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,float16,0,7.728127797444661
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,fp8,0,13.667157491048178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,float16,0,14.019583384195963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,fp8,0,7.4810028076171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,1,64,0,1,float16,float16,0,6.5111039479573565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,8,64,0,1,fp8,fp8,0,16.809300740559895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,48,64,0,1,fp8,fp8,0,9.526613235473633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,1,64,0,1,float16,fp8,0,6.601727803548177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,float16,0,6.600362777709961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,1,64,0,1,fp8,fp8,0,8.021162668863932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,fp8,0,7.08403205871582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,float16,0,6.225066502888997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,2,64,0,1,fp8,fp8,0,8.078506469726562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,fp8,0,7.093077341715495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,4,64,0,1,fp8,fp8,0,8.282453536987305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,float16,0,6.653952280680339
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,float16,0,3.932330767313639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,fp8,0,3.733674685160319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,fp8,0,6.246570587158203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,1,64,0,1,float16,float16,0,3.053567886352539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,48,64,0,1,fp8,fp8,0,4.62557856241862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,8,64,0,1,fp8,fp8,0,8.201728185017904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,1,64,0,1,float16,fp8,0,3.03872013092041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,1,64,0,1,fp8,fp8,0,3.9669758478800454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,float16,0,3.0607360204060874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,fp8,0,3.0069761276245117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,float16,0,3.0020265579223633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,fp8,0,3.010218620300293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,2,64,0,1,fp8,fp8,0,3.9640747706095376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,float16,0,3.0638081232706704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,4,64,0,1,fp8,fp8,0,3.94871457417806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,fp8,0,2.985472043355306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,float16,0,1.8459307352701824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,fp8,0,1.8085546493530273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,8,64,0,1,fp8,fp8,0,4.085247993469238
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,1,64,0,1,float16,float16,0,1.3796693483988445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,48,64,0,1,fp8,fp8,0,2.2848854064941406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,1,64,0,1,float16,fp8,0,1.4146560033162434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,1,64,0,1,fp8,fp8,0,1.9208532969156902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,float16,0,1.361237366994222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,fp8,0,1.3791573842366536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,2,64,0,1,fp8,fp8,0,1.9416747093200684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,float16,0,1.4120960235595703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,fp8,0,1.3597013155619304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,4,64,0,1,fp8,fp8,0,1.940992037455241
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,float16,0,1.423360029856364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,fp8,0,1.3649919827779133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,8,64,0,1,fp8,fp8,0,1.970688025156657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,float16,0,0.8055466810862223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,fp8,0,0.7780693372090658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,1,64,0,1,float16,fp8,0,0.7519573370615641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,48,64,0,1,fp8,fp8,0,1.1741866270701091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,1,64,0,1,float16,float16,0,0.7768747011820475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,1,64,0,1,fp8,fp8,0,1.04584534962972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,float16,0,0.746837298075358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,fp8,0,0.787285327911377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,2,64,0,1,fp8,fp8,0,1.0292906761169434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,float16,0,0.749567985534668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,fp8,0,0.7615146636962891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,4,64,0,1,fp8,fp8,0,1.0347519715627034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,float16,0,0.7517866293589274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,fp8,0,0.7488853136698405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,8,64,0,1,fp8,fp8,0,1.0210986932118733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,1,64,0,1,float16,float16,0,16.037376403808594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,1,64,0,1,float16,fp8,0,16.418304443359375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,float16,0,16.71748224894206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,fp8,0,16.80349858601888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,1,64,0,1,fp8,fp8,0,19.07797368367513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,float16,0,17.02092742919922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,fp8,0,16.42751948038737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,2,64,0,1,fp8,fp8,0,19.53331247965495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,1,64,0,1,float16,float16,0,7.892480214436849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,fp8,0,9.791317621866861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,float16,0,10.195797602335611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,4,64,0,1,fp8,fp8,0,20.067328135172527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,48,64,0,1,fp8,fp8,0,11.581780751546225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,float16,0,16.54852294921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,fp8,0,16.321706136067707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,8,64,0,1,fp8,fp8,0,20.9442138671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,1,64,0,1,float16,fp8,0,7.688191731770833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,fp8,0,7.735125223795573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,float16,0,8.390143712361654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,1,64,0,1,fp8,fp8,0,9.532074610392252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,float16,0,8.074581146240234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,2,64,0,1,fp8,fp8,0,9.483776092529297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,fp8,0,8.458922704060873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,4,64,0,1,fp8,fp8,0,9.725269317626953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,float16,0,4.989439964294434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,float16,0,7.984810511271159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,fp8,0,4.755626678466797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,1,64,0,1,float16,float16,0,3.7533013025919595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,fp8,0,8.554154713948568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,48,64,0,1,fp8,fp8,0,5.665280024210612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,8,64,0,1,fp8,fp8,0,9.803605397542318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,1,64,0,1,float16,fp8,0,3.6877654393514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,1,64,0,1,fp8,fp8,0,4.720981280008952
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,fp8,0,3.6920318603515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,float16,0,3.713024139404297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,float16,0,3.723093350728353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,2,64,0,1,fp8,fp8,0,4.684800148010254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,fp8,0,3.726506551106771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,4,64,0,1,fp8,fp8,0,4.741120020548503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,float16,0,3.8423894246419272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,float16,0,2.3557119369506836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,fp8,0,2.262869358062744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,fp8,0,3.866623878479004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,48,64,0,1,fp8,fp8,0,2.7721385955810547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,8,64,0,1,fp8,fp8,0,4.789077440897624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,1,64,0,1,float16,float16,0,1.6493226687113445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,1,64,0,1,float16,fp8,0,1.6785066922505696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,1,64,0,1,fp8,fp8,0,2.268330732981364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,float16,0,1.6592213312784831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,fp8,0,1.6465919812520344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,float16,0,1.6853334108988445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,fp8,0,1.6744106610616047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,2,64,0,1,fp8,fp8,0,2.269866625467936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,4,64,0,1,fp8,fp8,0,2.2551892598470054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,float16,0,1.7824427286783855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,fp8,0,1.7769813537597656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,fp8,0,1.0705920060475667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,float16,0,1.1272532939910889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,8,64,0,1,fp8,fp8,0,2.3273812929789224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,1,64,0,1,float16,float16,0,0.8453120390574137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,48,64,0,1,fp8,fp8,0,1.4098773002624512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,1,64,0,1,float16,fp8,0,0.8198826313018799
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,1,64,0,1,fp8,fp8,0,1.1455146471659343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,float16,0,0.8405333360036215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,fp8,0,0.8137386639912924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,2,64,0,1,fp8,fp8,0,1.136128028233846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,float16,0,0.8057173093159994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,fp8,0,0.8320000171661377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,float16,0,0.8046933015187582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,fp8,0,0.8219306468963623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,4,64,0,1,fp8,fp8,0,1.1351040204366047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,8,64,0,1,fp8,fp8,0,1.1735040346781414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,float16,0,0.4580693244934082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,fp8,0,0.45892266432444256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,1,64,0,1,float16,float16,0,0.4739413261413574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,48,64,0,1,fp8,fp8,0,0.7000746726989746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,1,64,0,1,float16,fp8,0,0.4671146472295125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,1,64,0,1,fp8,fp8,0,0.6282240152359009
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,float16,0,0.4800853331883748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,fp8,0,0.47650134563446045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,2,64,0,1,fp8,fp8,0,0.6253226598103842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,float16,0,0.4896426598230998
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,fp8,0,0.4804266691207886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,4,64,0,1,fp8,fp8,0,0.6239573160807291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,fp8,0,0.46609067916870117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,float16,0,0.4575573205947876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,8,64,0,1,fp8,fp8,0,0.6215680042902628
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,1,64,0,1,float16,float16,0,16.460458119710285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,1,64,0,1,float16,fp8,0,16.546133677164715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,1,64,0,1,fp8,fp8,0,18.932907104492188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,fp8,0,16.498004913330078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,float16,0,17.47268295288086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,fp8,0,16.704341888427734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,float16,0,17.021099090576172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,2,64,0,1,fp8,fp8,0,20.485802968343098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,1,64,0,1,float16,float16,0,7.916202545166016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,fp8,0,10.653525034586588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,float16,0,11.476309458414713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,4,64,0,1,fp8,fp8,0,20.59434636433919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,float16,0,17.620992024739582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,48,64,0,1,fp8,fp8,0,12.254207611083984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,fp8,0,17.594027201334637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,8,64,0,1,fp8,fp8,0,21.2140375773112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,1,64,0,1,float16,fp8,0,7.683584213256836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,1,64,0,1,fp8,fp8,0,9.105578740437826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,fp8,0,7.805952072143555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,float16,0,8.479573567708334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,float16,0,8.119295756022135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,2,64,0,1,fp8,fp8,0,9.277952194213867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,fp8,0,7.728469212849935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,4,64,0,1,fp8,fp8,0,9.339733123779297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,float16,0,8.32358423868815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,float16,0,5.469866434733073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,1,64,0,1,float16,float16,0,3.622229258219401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,fp8,0,5.107541402180989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,48,64,0,1,fp8,fp8,0,5.8286081949869795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,fp8,0,8.122026443481445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,8,64,0,1,fp8,fp8,0,9.967274983723959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,1,64,0,1,float16,fp8,0,3.630250612894694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,float16,0,3.6998828252156577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,1,64,0,1,fp8,fp8,0,4.450474739074707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,fp8,0,3.5396267573038735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,float16,0,3.6954453786214194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,fp8,0,3.7263358434041343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,2,64,0,1,fp8,fp8,0,4.486485481262207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,4,64,0,1,fp8,fp8,0,4.600319862365723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,float16,0,3.7901652654012046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,fp8,0,2.468010743459066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,1,64,0,1,float16,float16,0,1.6807252566019695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,fp8,0,3.859114646911621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,float16,0,2.5779199600219727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,48,64,0,1,fp8,fp8,0,2.860714594523112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,8,64,0,1,fp8,fp8,0,4.657493273417155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,1,64,0,1,float16,fp8,0,1.7271466255187988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,1,64,0,1,fp8,fp8,0,2.1401599248250327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,float16,0,1.7092266082763672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,fp8,0,1.6848212877909343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,2,64,0,1,fp8,fp8,0,2.174976030985514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,fp8,0,1.723903973897298
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,float16,0,1.7278292973836262
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,4,64,0,1,fp8,fp8,0,2.1935787200927734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,float16,0,1.885525385538737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,fp8,0,1.7933653195699055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,float16,0,1.215488036473592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,8,64,0,1,fp8,fp8,0,2.2512639363606772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,fp8,0,1.168554703394572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,48,64,0,1,fp8,fp8,0,1.4375252723693848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,1,64,0,1,float16,float16,0,0.7748266855875651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,1,64,0,1,float16,fp8,0,0.7901866436004639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,1,64,0,1,fp8,fp8,0,1.0810026327768962
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,float16,0,0.7676586310068766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,fp8,0,0.7681706746419271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,2,64,0,1,fp8,fp8,0,1.074005365371704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,fp8,0,0.7767039934794108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,float16,0,0.7949653466542562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,float16,0,0.816810687383016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,4,64,0,1,fp8,fp8,0,1.0881706873575847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,fp8,0,0.8115200201670328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,8,64,0,1,fp8,fp8,0,1.1246933142344158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,float16,0,0.5108053286870321
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,fp8,0,0.44356266657511395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,1,64,0,1,float16,float16,0,0.407039999961853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,48,64,0,1,fp8,fp8,0,0.7289173603057861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,1,64,0,1,float16,fp8,0,0.4087466796239217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,1,64,0,1,fp8,fp8,0,0.562175989151001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,float16,0,0.4089173475901286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,fp8,0,0.40004265308380127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,float16,0,0.4020906686782837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,2,64,0,1,fp8,fp8,0,0.5570559899012247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,fp8,0,0.4036266803741455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,4,64,0,1,fp8,fp8,0,0.5577386617660522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,float16,0,0.408405343691508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,fp8,0,0.3952639897664388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,8,64,0,1,fp8,fp8,0,0.567466656366984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,float16,0,0.23944532871246338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,fp8,0,0.23586134115854898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,48,64,0,1,fp8,fp8,0,0.3331413269042969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,1,64,0,1,float16,float16,0,0.23859200874964395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,1,64,0,1,float16,fp8,0,0.23569067319234213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,1,64,0,1,fp8,fp8,0,0.3218773404757182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,float16,0,0.24149332443873087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,fp8,0,0.23449599742889404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,2,64,0,1,fp8,fp8,0,0.31914667288462323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,float16,0,0.24251733223597208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,fp8,0,0.23825067281723022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,4,64,0,1,fp8,fp8,0,0.3188053369522095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,fp8,0,0.2302293380101522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,float16,0,0.23244800170262656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,8,64,0,1,fp8,fp8,0,0.31829333305358887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,1,64,0,1,float16,float16,0,9.786879857381185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,1,64,0,1,float16,fp8,0,9.467903772989908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,float16,0,9.84337043762207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,1,64,0,1,fp8,fp8,0,11.130026499430338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,fp8,0,9.959423700968424
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,float16,0,10.143914540608725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,fp8,0,9.93450673421224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,2,64,0,1,fp8,fp8,0,11.780095418294271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,4,64,0,1,fp8,fp8,0,12.02892812093099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,float16,0,10.497365315755209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,1,64,0,1,float16,float16,0,4.654080073038737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,float16,0,7.4548905690511065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,fp8,0,6.896298726399739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,48,64,0,1,fp8,fp8,0,7.666858673095703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,fp8,0,10.486613591512045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,8,64,0,1,fp8,fp8,0,12.697940826416016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,1,64,0,1,float16,fp8,0,4.666538556416829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,1,64,0,1,fp8,fp8,0,5.383850733439128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,float16,0,4.672341346740723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,fp8,0,4.6783145268758135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,2,64,0,1,fp8,fp8,0,5.4869333902994795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,float16,0,4.65885861714681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,fp8,0,4.611754735310872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,4,64,0,1,fp8,fp8,0,5.556735992431641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,float16,0,5.0664107004801435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,fp8,0,4.8571732838948565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,8,64,0,1,fp8,fp8,0,5.82912000020345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,float16,0,3.5321172078450522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,1,64,0,1,float16,float16,0,2.1806079546610513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,fp8,0,3.3360214233398438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,48,64,0,1,fp8,fp8,0,3.6780373255411782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,1,64,0,1,float16,fp8,0,2.174293359120687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,1,64,0,1,fp8,fp8,0,2.6168319384256997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,float16,0,2.1913599967956543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,fp8,0,2.2398293813069663
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,float16,0,2.258432070414225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,2,64,0,1,fp8,fp8,0,2.671104113260905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,fp8,0,2.2381226221720376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,4,64,0,1,fp8,fp8,0,2.675882657368978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,float16,0,2.392063935597738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,fp8,0,2.4072532653808594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,8,64,0,1,fp8,fp8,0,2.756437301635742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,float16,0,1.6865280469258626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,fp8,0,1.5793493588765461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,1,64,0,1,float16,float16,0,0.9965226650238037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,48,64,0,1,fp8,fp8,0,1.837567965189616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,1,64,0,1,float16,fp8,0,0.9821866353352865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,1,64,0,1,fp8,fp8,0,1.300650676091512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,float16,0,1.0240000089009602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,fp8,0,0.9968640009562174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,float16,0,1.0222933292388916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,2,64,0,1,fp8,fp8,0,1.3096960385640461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,fp8,0,1.0156373182932537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,4,64,0,1,fp8,fp8,0,1.3431466420491536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,float16,0,1.0919253031412761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,fp8,0,1.0733226935068767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,8,64,0,1,fp8,fp8,0,1.3875199953715007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,float16,0,0.7620266278584799
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,fp8,0,0.7365972995758057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,1,64,0,1,float16,float16,0,0.47359999020894367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,48,64,0,1,fp8,fp8,0,0.928938627243042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,1,64,0,1,float16,fp8,0,0.4790613253911336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,float16,0,0.47018667062123615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,1,64,0,1,fp8,fp8,0,0.6370986700057983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,fp8,0,0.48452266057332355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,2,64,0,1,fp8,fp8,0,0.6394879817962646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,float16,0,0.4601173400878906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,fp8,0,0.4642133315404256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,4,64,0,1,fp8,fp8,0,0.6471680005391439
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,float16,0,0.4742826620737712
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,fp8,0,0.4642133315404256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,8,64,0,1,fp8,fp8,0,0.6635520060857137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,float16,0,0.276309331258138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,fp8,0,0.2624853253364563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,48,64,0,1,fp8,fp8,0,0.442197322845459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,1,64,0,1,float16,float16,0,0.23893332481384277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,1,64,0,1,float16,fp8,0,0.2387626568476359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,1,64,0,1,fp8,fp8,0,0.3426986535390218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,float16,0,0.23603200912475586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,fp8,0,0.2409813404083252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,2,64,0,1,fp8,fp8,0,0.34201598167419434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,fp8,0,0.23995733261108398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,float16,0,0.24678399165471396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,4,64,0,1,fp8,fp8,0,0.3408213456471761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,float16,0,0.24900267521540323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,fp8,0,0.24576000372568765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,8,64,0,1,fp8,fp8,0,0.34303998947143555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,float16,0,0.14967466394106546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,fp8,0,0.15035733580589294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,48,64,0,1,fp8,fp8,0,0.21145600080490112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,1,64,0,1,float16,float16,0,0.15377066532770792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,1,64,0,1,float16,fp8,0,0.1539413332939148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,1,64,0,1,fp8,fp8,0,0.2063360015551249
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,float16,0,0.15530666708946228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,fp8,0,0.15240533153216043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,2,64,0,1,fp8,fp8,0,0.20821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,float16,0,0.155648003021876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,fp8,0,0.1534293293952942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,float16,0,0.15121066570281982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,4,64,0,1,fp8,fp8,0,0.2065066695213318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,fp8,0,0.15223466356595358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,8,64,0,1,fp8,fp8,0,0.2058239976565043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,1,64,0,1,float16,float16,0,10.178901036580404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,1,64,0,1,float16,fp8,0,10.016085306803385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,float16,0,10.539007822672525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,fp8,0,10.6702512105306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,1,64,0,1,fp8,fp8,0,11.362815856933594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,float16,0,10.991104125976562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,fp8,0,10.905770619710287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,2,64,0,1,fp8,fp8,0,11.756202697753906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,4,64,0,1,fp8,fp8,0,12.124159495035807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,float16,0,11.391658782958984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,1,64,0,1,float16,float16,0,4.815360069274902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,fp8,0,8.088234583536783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,float16,0,8.545450846354166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,48,64,0,1,fp8,fp8,0,8.632149378458658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,fp8,0,11.115178426106771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,8,64,0,1,fp8,fp8,0,12.650154113769531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,1,64,0,1,float16,fp8,0,4.823040008544922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,1,64,0,1,fp8,fp8,0,5.476010640462239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,float16,0,4.963328043619792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,fp8,0,5.032959938049316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,2,64,0,1,fp8,fp8,0,5.688149134318034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,float16,0,5.183829307556152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,fp8,0,5.016746520996094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,4,64,0,1,fp8,fp8,0,5.785600026448567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,float16,0,5.335039774576823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,fp8,0,5.268479983011882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,8,64,0,1,fp8,fp8,0,6.043647766113281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,float16,0,4.142421404520671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,1,64,0,1,float16,float16,0,2.3217493693033853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,fp8,0,3.918506622314453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,48,64,0,1,fp8,fp8,0,4.11135991414388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,1,64,0,1,float16,fp8,0,2.3495680491129556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,1,64,0,1,fp8,fp8,0,2.635434627532959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,float16,0,2.3814826011657715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,fp8,0,2.3883093198140464
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,float16,0,2.4289280573527017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,2,64,0,1,fp8,fp8,0,2.695338567097982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,fp8,0,2.4746665954589844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,4,64,0,1,fp8,fp8,0,2.7601919174194336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,float16,0,2.589354674021403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,fp8,0,2.528597354888916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,8,64,0,1,fp8,fp8,0,2.8392105102539062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,float16,0,2.0039679209391275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,fp8,0,1.8945706685384114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,48,64,0,1,fp8,fp8,0,2.0196693738301597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,1,64,0,1,float16,float16,0,1.1139413515726726
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,1,64,0,1,float16,fp8,0,1.1019946734110515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,1,64,0,1,fp8,fp8,0,1.3026986916859944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,float16,0,1.1064319610595703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,fp8,0,1.0939733187357585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,2,64,0,1,fp8,fp8,0,1.3192533651987712
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,float16,0,1.137493371963501
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,fp8,0,1.1482453346252441
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,float16,0,1.2373332977294922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,4,64,0,1,fp8,fp8,0,1.3620905876159668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,fp8,0,1.19978666305542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,8,64,0,1,fp8,fp8,0,1.413632074991862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,float16,0,0.9403733412424723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,fp8,0,0.8763733704884847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,48,64,0,1,fp8,fp8,0,1.0019839604695637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,1,64,0,1,float16,float16,0,0.475818673769633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,1,64,0,1,fp8,fp8,0,0.637440005938212
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,1,64,0,1,float16,fp8,0,0.4751360019048055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,float16,0,0.4638719956080119
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,fp8,0,0.46455466747283936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,2,64,0,1,fp8,fp8,0,0.6394879817962646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,float16,0,0.47650134563446045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,fp8,0,0.4695039987564087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,4,64,0,1,fp8,fp8,0,0.6597973505655924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,float16,0,0.5193386475245158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,fp8,0,0.506880005200704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,8,64,0,1,fp8,fp8,0,0.7029759883880615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,float16,0,0.3543039957682292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,fp8,0,0.29576534032821655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,48,64,0,1,fp8,fp8,0,0.5063680013020834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,1,64,0,1,float16,float16,0,0.23347200949986777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,1,64,0,1,float16,fp8,0,0.23586134115854898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,float16,0,0.23483733336130777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,1,64,0,1,fp8,fp8,0,0.3203413287798564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,fp8,0,0.23415466149648032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,2,64,0,1,fp8,fp8,0,0.31948800881703693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,fp8,0,0.23859200874964395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,4,64,0,1,fp8,fp8,0,0.3186346689860026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,float16,0,0.24337067206700644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,8,64,0,1,fp8,fp8,0,0.3280213276545207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,float16,0,0.13499733805656433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,fp8,0,0.23278933763504028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,float16,0,0.2336426575978597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,fp8,0,0.13090133666992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,48,64,0,1,fp8,fp8,0,0.18090667327245077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,1,64,0,1,float16,float16,0,0.1288533310095469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,1,64,0,1,float16,fp8,0,0.13056000073750815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,float16,0,0.12868266304334006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,1,64,0,1,fp8,fp8,0,0.17356799046198526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,fp8,0,0.12834133704503378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,2,64,0,1,fp8,fp8,0,0.17151999473571777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,float16,0,0.13141333063443503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,fp8,0,0.13294933239618936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,fp8,0,0.12868266304334006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,4,64,0,1,fp8,fp8,0,0.17442133029301962
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,float16,0,0.12919466694196066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,8,64,0,1,fp8,fp8,0,0.17425066232681274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,float16,0,0.08072533210118611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,48,64,0,1,fp8,fp8,0,0.10291199882825215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,fp8,0,0.08089600006739299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,1,64,0,1,float16,float16,0,0.07935999830563863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,1,64,0,1,float16,fp8,0,0.08004266520341237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,1,64,0,1,fp8,fp8,0,0.1013759970664978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,fp8,0,0.08072533210118611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,2,64,0,1,fp8,fp8,0,0.1013759970664978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,float16,0,0.07850666840871175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,float16,0,0.07867733140786488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,fp8,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,4,64,0,1,fp8,fp8,0,0.10274133086204529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,fp8,0,0.077824001510938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,8,64,0,1,fp8,fp8,0,0.10171733299891154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,1,64,0,1,float16,float16,0,7.579989115397136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,1,64,0,1,float16,fp8,0,7.656447728474935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,1,64,0,1,fp8,fp8,0,7.75714111328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,2,64,0,1,float16,fp8,0,8.167765299479166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,2,64,0,1,float16,float16,0,8.134143829345703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,2,64,0,1,fp8,fp8,0,8.247125625610352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,4,64,0,1,float16,float16,0,8.293888092041016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,4,64,0,1,float16,fp8,0,8.426495869954428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,4,64,0,1,fp8,fp8,0,8.552960077921549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,8,64,0,1,float16,float16,0,9.054719924926758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,8,64,0,1,float16,fp8,0,8.823808034261068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,1,64,0,1,float16,float16,0,3.5964587529500327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,48,64,0,1,fp8,fp8,0,6.947498957316081
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,48,64,0,1,float16,fp8,0,7.324330647786458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,8,64,0,1,fp8,fp8,0,9.314474741617838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,48,64,0,1,float16,float16,0,7.880191802978516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,1,64,0,1,float16,fp8,0,3.5264854431152344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,1,64,0,1,fp8,fp8,0,3.6899840037027993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,2,64,0,1,float16,float16,0,3.770538647969564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,2,64,0,1,float16,fp8,0,3.7521066665649414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,2,64,0,1,fp8,fp8,0,3.794432004292806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,4,64,0,1,float16,float16,0,3.7794132232666016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,4,64,0,1,float16,fp8,0,3.85587215423584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,4,64,0,1,fp8,fp8,0,3.9837013880411782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,8,64,0,1,float16,float16,0,4.2248531977335615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,8,64,0,1,float16,fp8,0,4.13969071706136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,8,64,0,1,fp8,fp8,0,4.271615982055664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,48,64,0,1,float16,float16,0,3.741696039835612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,48,64,0,1,float16,fp8,0,3.5445760091145835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,48,64,0,1,fp8,fp8,0,3.2655359903971353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,1,64,0,1,float16,float16,0,1.7150293986002605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,1,64,0,1,float16,fp8,0,1.6938667297363281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,1,64,0,1,fp8,fp8,0,1.8018986384073894
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,2,64,0,1,float16,float16,0,1.7749333381652832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,2,64,0,1,float16,fp8,0,1.7408000628153484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,2,64,0,1,fp8,fp8,0,1.8607786496480305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,4,64,0,1,float16,float16,0,1.8609493573506672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,4,64,0,1,float16,fp8,0,1.8445653915405273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,4,64,0,1,fp8,fp8,0,1.9034454027811687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,8,64,0,1,float16,float16,0,2.0072107315063477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,8,64,0,1,fp8,fp8,0,2.0143787066141763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,8,64,0,1,float16,fp8,0,1.9689812660217285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,48,64,0,1,float16,float16,0,1.809066613515218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,48,64,0,1,float16,fp8,0,1.7097387313842773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,1,64,0,1,float16,float16,0,0.7821653683980306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,1,64,0,1,float16,fp8,0,0.7816533247629801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,48,64,0,1,fp8,fp8,0,1.5853226979573567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,2,64,0,1,float16,float16,0,0.8323413530985514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,2,64,0,1,float16,fp8,0,0.7942826747894287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,1,64,0,1,fp8,fp8,0,0.8864426612854004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,2,64,0,1,fp8,fp8,0,0.9303039709726969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,4,64,0,1,float16,float16,0,0.8468480110168457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,4,64,0,1,float16,fp8,0,0.830293337504069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,4,64,0,1,fp8,fp8,0,0.9408853054046631
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,8,64,0,1,float16,float16,0,0.9378133614857992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,8,64,0,1,float16,fp8,0,0.9159680207570394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,8,64,0,1,fp8,fp8,0,0.9912319978078207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,48,64,0,1,float16,fp8,0,0.7726079622904459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,48,64,0,1,float16,float16,0,0.8265386422475179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,1,64,0,1,float16,float16,0,0.3099306623140971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,48,64,0,1,fp8,fp8,0,0.8045226732889811
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,1,64,0,1,float16,fp8,0,0.31675734122594196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,1,64,0,1,fp8,fp8,0,0.41574398676554364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,2,64,0,1,float16,float16,0,0.3179519971211751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,2,64,0,1,float16,fp8,0,0.30958932638168335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,2,64,0,1,fp8,fp8,0,0.425983985265096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,4,64,0,1,float16,float16,0,0.3237546682357788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,4,64,0,1,float16,fp8,0,0.320853332678477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,4,64,0,1,fp8,fp8,0,0.4503893454869588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,8,64,0,1,float16,float16,0,0.36130134264628094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,8,64,0,1,float16,fp8,0,0.3479893207550049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,8,64,0,1,fp8,fp8,0,0.4915200074513753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,48,64,0,1,float16,float16,0,0.28245333830515545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,48,64,0,1,float16,fp8,0,0.22357332706451416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,1,64,0,1,float16,float16,0,0.1525759994983673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,1,64,0,1,fp8,fp8,0,0.20462934176127115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,1,64,0,1,float16,fp8,0,0.15428266922632852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,48,64,0,1,fp8,fp8,0,0.3959466616312663
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,2,64,0,1,float16,float16,0,0.1525759994983673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,2,64,0,1,float16,fp8,0,0.1551359991232554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,2,64,0,1,fp8,fp8,0,0.20206934213638306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,4,64,0,1,float16,float16,0,0.15411200126012167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,4,64,0,1,float16,fp8,0,0.15377066532770792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,4,64,0,1,fp8,fp8,0,0.20599466562271118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,8,64,0,1,float16,float16,0,0.1565013329188029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,8,64,0,1,fp8,fp8,0,0.20411733786265054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,8,64,0,1,float16,fp8,0,0.1532586713631948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,48,64,0,1,float16,float16,0,0.09352533022562663
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,48,64,0,1,float16,fp8,0,0.09079466263453166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,48,64,0,1,fp8,fp8,0,0.1186133325099945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,1,64,0,1,float16,float16,0,0.08772266904513042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,1,64,0,1,float16,fp8,0,0.0890880028406779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,1,64,0,1,fp8,fp8,0,0.11246933539708455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,2,64,0,1,float16,float16,0,0.08772266904513042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,2,64,0,1,float16,fp8,0,0.08840533097585042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,2,64,0,1,fp8,fp8,0,0.11281067132949829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,4,64,0,1,float16,float16,0,0.09011200070381165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,4,64,0,1,float16,fp8,0,0.0890880028406779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,4,64,0,1,fp8,fp8,0,0.11332266529401143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,8,64,0,1,float16,float16,0,0.09062400460243225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,8,64,0,1,float16,fp8,0,0.08994133273760478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,8,64,0,1,fp8,fp8,0,0.1129813293615977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,48,64,0,1,float16,float16,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,48,64,0,1,float16,fp8,0,0.053077335158983864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,1,64,0,1,float16,float16,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,48,64,0,1,fp8,fp8,0,0.06963199873765309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,1,64,0,1,float16,fp8,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,1,64,0,1,fp8,fp8,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,2,64,0,1,float16,float16,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,2,64,0,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,4,64,0,1,float16,float16,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,4,64,0,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,2,64,0,1,fp8,fp8,0,0.0682666649421056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,4,64,0,1,fp8,fp8,0,0.06775466601053874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,8,64,0,1,float16,float16,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,8,64,0,1,float16,fp8,0,0.05239466826121012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,8,64,0,1,fp8,fp8,0,0.06775466601053874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,48,64,0,1,float16,float16,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,48,64,0,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,48,64,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,1,64,0,1,float16,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,1,64,0,1,float16,float16,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,1,64,0,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,2,64,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,2,64,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,2,64,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,4,64,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,4,64,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,4,64,0,1,fp8,fp8,0,0.03942399968703588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,8,64,0,1,float16,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,8,64,0,1,float16,float16,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,8,64,0,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,1,64,0,1,float16,float16,0,3.0651734670003257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,1,64,0,1,float16,fp8,0,3.0504961013793945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,1,64,0,1,fp8,fp8,0,2.757120132446289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,2,64,0,1,float16,float16,0,3.326122601826986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,2,64,0,1,float16,fp8,0,3.319978713989258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,2,64,0,1,fp8,fp8,0,2.948608080546061
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,4,64,0,1,float16,fp8,0,3.4001919428507485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,4,64,0,1,float16,float16,0,3.458218574523926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,4,64,0,1,fp8,fp8,0,3.1653547286987305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,8,64,0,1,float16,float16,0,3.8376105626424155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,8,64,0,1,float16,fp8,0,3.7331625620524087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,8,64,0,1,fp8,fp8,0,3.438762664794922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,48,64,0,1,float16,float16,0,3.726506551106771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,48,64,0,1,float16,fp8,0,3.510784149169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,48,64,0,1,fp8,fp8,0,2.9059413274129233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,1,64,0,1,float16,float16,0,1.4595413208007812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,1,64,0,1,float16,fp8,0,1.4484480222066243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,1,64,0,1,fp8,fp8,0,1.3351252873738606
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,2,64,0,1,float16,float16,0,1.6167252858479817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,2,64,0,1,float16,fp8,0,1.6027305920918782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,2,64,0,1,fp8,fp8,0,1.410048007965088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,4,64,0,1,float16,float16,0,1.628159999847412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,4,64,0,1,float16,fp8,0,1.605631987253825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,4,64,0,1,fp8,fp8,0,1.4750720659891765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,8,64,0,1,float16,float16,0,1.818453311920166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,8,64,0,1,float16,fp8,0,1.7655466397603352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,8,64,0,1,fp8,fp8,0,1.593173344930013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,48,64,0,1,float16,float16,0,1.8186240196228027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,48,64,0,1,float16,fp8,0,1.6988159815470378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,48,64,0,1,fp8,fp8,0,1.4167040189107258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,1,64,0,1,float16,float16,0,0.6715733210245768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,1,64,0,1,float16,fp8,0,0.667306661605835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,1,64,0,1,fp8,fp8,0,0.6980266571044922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,2,64,0,1,float16,float16,0,0.7154346307118734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,2,64,0,1,float16,fp8,0,0.6842026710510254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,2,64,0,1,fp8,fp8,0,0.6930773258209229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,4,64,0,1,float16,fp8,0,0.748032013575236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,4,64,0,1,float16,float16,0,0.7594666481018066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,4,64,0,1,fp8,fp8,0,0.7309652964274088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,8,64,0,1,float16,float16,0,0.831658681233724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,8,64,0,1,float16,fp8,0,0.807253360748291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,8,64,0,1,fp8,fp8,0,0.7842133045196533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,48,64,0,1,float16,float16,0,0.8354132970174154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,48,64,0,1,float16,fp8,0,0.7729492982228597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,1,64,0,1,float16,float16,0,0.22801067431767783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,48,64,0,1,fp8,fp8,0,0.6988800366719564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,1,64,0,1,float16,fp8,0,0.22886399428049722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,1,64,0,1,fp8,fp8,0,0.3232426643371582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,2,64,0,1,float16,float16,0,0.24149332443873087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,2,64,0,1,float16,fp8,0,0.23534933725992838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,2,64,0,1,fp8,fp8,0,0.33553067843119305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,4,64,0,1,float16,float16,0,0.2524159948031108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,4,64,0,1,float16,fp8,0,0.2619733413060506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,4,64,0,1,fp8,fp8,0,0.3426986535390218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,8,64,0,1,float16,float16,0,0.29661866029103595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,8,64,0,1,float16,fp8,0,0.2783573269844055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,8,64,0,1,fp8,fp8,0,0.3853653271993001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,48,64,0,1,float16,float16,0,0.25890133778254193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,48,64,0,1,float16,fp8,0,0.1904639999071757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,48,64,0,1,fp8,fp8,0,0.34167468547821045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,1,64,0,1,float16,float16,0,0.11110400160153706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,1,64,0,1,float16,fp8,0,0.11366400122642517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,1,64,0,1,fp8,fp8,0,0.1431893308957418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,2,64,0,1,float16,fp8,0,0.11195733149846394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,2,64,0,1,float16,float16,0,0.11161599556605022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,2,64,0,1,fp8,fp8,0,0.14250666896502176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,4,64,0,1,float16,fp8,0,0.11127466956774394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,4,64,0,1,float16,float16,0,0.11417599519093831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,8,64,0,1,float16,float16,0,0.11059199770291646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,4,64,0,1,fp8,fp8,0,0.14660267035166422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,8,64,0,1,float16,fp8,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,8,64,0,1,fp8,fp8,0,0.1462613344192505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,48,64,0,1,float16,fp8,0,0.06741333504517873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,48,64,0,1,fp8,fp8,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,1,64,0,1,float16,float16,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,48,64,0,1,float16,float16,0,0.06946133573849995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,1,64,0,1,float16,fp8,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,1,64,0,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,2,64,0,1,float16,float16,0,0.06553600231806438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,2,64,0,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,2,64,0,1,fp8,fp8,0,0.08106666803359985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,4,64,0,1,float16,float16,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,4,64,0,1,float16,fp8,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,8,64,0,1,float16,float16,0,0.06297599772612254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,4,64,0,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,8,64,0,1,fp8,fp8,0,0.08089600006739299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,8,64,0,1,float16,fp8,0,0.06434133152167003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,48,64,0,1,float16,float16,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,48,64,0,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,48,64,0,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,1,64,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,1,64,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,2,64,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,2,64,0,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,1,64,0,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,2,64,0,1,fp8,fp8,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,4,64,0,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,4,64,0,1,float16,fp8,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,4,64,0,1,fp8,fp8,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,8,64,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,8,64,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,8,64,0,1,fp8,fp8,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,48,64,0,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,48,64,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,48,64,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,1,64,0,1,float16,float16,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,1,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,1,64,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,2,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,2,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,2,64,0,1,fp8,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,4,64,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,4,64,0,1,fp8,fp8,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,4,64,0,1,float16,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,8,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,8,64,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,8,64,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,48,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,48,64,0,1,float16,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,48,64,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,1,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,1,64,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,1,64,0,1,fp8,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,2,64,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,2,64,0,1,float16,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,2,64,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,4,64,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,4,64,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,4,64,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,8,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,8,64,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,8,64,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,1,64,0,1,float16,float16,0,1.4684160550435383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,1,64,0,1,float16,fp8,0,1.4644907315572102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,1,64,0,1,fp8,fp8,0,1.1736746629079182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,2,64,0,1,float16,float16,0,1.5295146306355794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,2,64,0,1,float16,fp8,0,1.516032059987386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,2,64,0,1,fp8,fp8,0,1.2071253458658855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,4,64,0,1,float16,fp8,0,1.6013654073079426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,4,64,0,1,float16,float16,0,1.645055929819743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,4,64,0,1,fp8,fp8,0,1.2818773587544758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,8,64,0,1,float16,float16,0,1.8483200073242188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,8,64,0,1,float16,fp8,0,1.7933653195699055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,8,64,0,1,fp8,fp8,0,1.4039039611816406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,1,64,0,1,float16,float16,0,0.6514346599578857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,48,64,0,1,fp8,fp8,0,1.3960533142089844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,48,64,0,1,float16,float16,0,1.8097492853800456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,48,64,0,1,float16,fp8,0,1.7128106753031414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,1,64,0,1,float16,fp8,0,0.673962672551473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,1,64,0,1,fp8,fp8,0,0.6014293432235718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,2,64,0,1,float16,float16,0,0.6905173460642496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,2,64,0,1,fp8,fp8,0,0.5978453159332275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,2,64,0,1,float16,fp8,0,0.6855680147806803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,4,64,0,1,float16,float16,0,0.7338666915893555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,4,64,0,1,float16,fp8,0,0.7229440212249756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,4,64,0,1,fp8,fp8,0,0.6316373348236084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,8,64,0,1,float16,float16,0,0.8432640234629313
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,8,64,0,1,float16,fp8,0,0.8272213141123453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,48,64,0,1,float16,fp8,0,0.7797760168711344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,48,64,0,1,fp8,fp8,0,0.683690627415975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,48,64,0,1,float16,float16,0,0.8432640234629313
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,1,64,0,1,float16,float16,0,0.19268266359965006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,1,64,0,1,float16,fp8,0,0.1930239995320638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,8,64,0,1,fp8,fp8,0,0.6903466383616129
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,1,64,0,1,fp8,fp8,0,0.27084799607594806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,2,64,0,1,float16,float16,0,0.19473065932591757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,2,64,0,1,float16,fp8,0,0.19490132729212442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,2,64,0,1,fp8,fp8,0,0.27135999997456867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,4,64,0,1,float16,float16,0,0.2244266668955485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,4,64,0,1,float16,fp8,0,0.21094399690628052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,4,64,0,1,fp8,fp8,0,0.292522668838501
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,8,64,0,1,float16,float16,0,0.2788693308830261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,8,64,0,1,float16,fp8,0,0.2577066620190938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,8,64,0,1,fp8,fp8,0,0.33536001046498615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,48,64,0,1,float16,float16,0,0.26026666164398193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,48,64,0,1,float16,fp8,0,0.1858560045560201
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,1,64,0,1,float16,float16,0,0.08823466300964355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,48,64,0,1,fp8,fp8,0,0.3269973397254944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,1,64,0,1,float16,fp8,0,0.08601599931716919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,2,64,0,1,float16,float16,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,1,64,0,1,fp8,fp8,0,0.10939733187357585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,2,64,0,1,float16,fp8,0,0.0865280032157898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,2,64,0,1,fp8,fp8,0,0.1109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,4,64,0,1,float16,float16,0,0.08703999718030293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,4,64,0,1,float16,fp8,0,0.08686932921409607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,4,64,0,1,fp8,fp8,0,0.13226667046546936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,8,64,0,1,float16,float16,0,0.0865280032157898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,8,64,0,1,float16,fp8,0,0.08840533097585042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,8,64,0,1,fp8,fp8,0,0.1129813293615977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,48,64,0,1,float16,float16,0,0.0554666668176651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,48,64,0,1,float16,fp8,0,0.05205333232879639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,48,64,0,1,fp8,fp8,0,0.07321600119272868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,1,64,0,1,float16,fp8,0,0.04761599997679392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,1,64,0,1,float16,float16,0,0.048810665806134544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,1,64,0,1,fp8,fp8,0,0.06297599772612254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,2,64,0,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,2,64,0,1,float16,fp8,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,2,64,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,4,64,0,1,float16,float16,0,0.048810665806134544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,4,64,0,1,float16,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,4,64,0,1,fp8,fp8,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,8,64,0,1,float16,float16,0,0.04863999783992767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,8,64,0,1,float16,fp8,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,8,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,48,64,0,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,48,64,0,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,48,64,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,1,64,0,1,float16,float16,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,1,64,0,1,float16,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,2,64,0,1,float16,float16,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,1,64,0,1,fp8,fp8,0,0.039936001102129616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,2,64,0,1,float16,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,2,64,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,4,64,0,1,float16,float16,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,4,64,0,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,4,64,0,1,fp8,fp8,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,8,64,0,1,float16,float16,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,8,64,0,1,float16,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,8,64,0,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,48,64,0,1,float16,float16,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,48,64,0,1,float16,fp8,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,48,64,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,1,64,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,1,64,0,1,float16,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,1,64,0,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,2,64,0,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,2,64,0,1,float16,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,2,64,0,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,4,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,4,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,4,64,0,1,fp8,fp8,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,8,64,0,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,8,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,8,64,0,1,fp8,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,48,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,48,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,48,64,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,1,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,1,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,1,64,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,2,64,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,2,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,2,64,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,4,64,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,4,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,4,64,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,8,64,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,8,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,8,64,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,48,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,48,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,48,64,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,1,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,1,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,1,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,2,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,2,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,2,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,4,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,4,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,4,64,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,8,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,8,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,8,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,1,64,0,1,float16,float16,0,0.6611626545588175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,1,64,0,1,float16,fp8,0,0.6570666631062826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,2,64,0,1,float16,float16,0,0.6935893694559733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,1,64,0,1,fp8,fp8,0,0.7618559996287028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,2,64,0,1,float16,fp8,0,0.6893226305643717
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,2,64,0,1,fp8,fp8,0,0.779263973236084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,4,64,0,1,float16,float16,0,0.7424000104268392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,4,64,0,1,float16,fp8,0,0.733184019724528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,4,64,0,1,fp8,fp8,0,0.8224426905314127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,8,64,0,1,float16,float16,0,0.8557226657867432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,8,64,0,1,float16,fp8,0,0.830293337504069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,8,64,0,1,fp8,fp8,0,0.8867839972178141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,48,64,0,1,float16,float16,0,0.8564053376515707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,48,64,0,1,float16,fp8,0,0.7918933232625326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,48,64,0,1,fp8,fp8,0,0.730282704035441
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,1,64,0,1,float16,float16,0,0.17561600605646768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,1,64,0,1,float16,fp8,0,0.1718613306681315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,2,64,0,1,float16,float16,0,0.19285333156585693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,1,64,0,1,fp8,fp8,0,0.35089067618052167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,2,64,0,1,float16,fp8,0,0.19063466787338257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,2,64,0,1,fp8,fp8,0,0.3609600067138672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,4,64,0,1,float16,float16,0,0.2373973329861959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,4,64,0,1,float16,fp8,0,0.2126506765683492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,4,64,0,1,fp8,fp8,0,0.38417065143585205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,8,64,0,1,float16,float16,0,0.2834773262341817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,8,64,0,1,float16,fp8,0,0.2648746569951375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,8,64,0,1,fp8,fp8,0,0.4288853406906128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,48,64,0,1,float16,float16,0,0.2573653260866801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,48,64,0,1,float16,fp8,0,0.18397865692774454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,48,64,0,1,fp8,fp8,0,0.3653973340988159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,1,64,0,1,float16,float16,0,0.06877866884072621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,1,64,0,1,float16,fp8,0,0.06911999980608623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,1,64,0,1,fp8,fp8,0,0.15291733543078104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,2,64,0,1,float16,float16,0,0.07133866846561432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,2,64,0,1,float16,fp8,0,0.07014399766921997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,2,64,0,1,fp8,fp8,0,0.15411200126012167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,4,64,0,1,float16,float16,0,0.07116800049940745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,4,64,0,1,float16,fp8,0,0.07167999943097432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,4,64,0,1,fp8,fp8,0,0.15377066532770792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,8,64,0,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,8,64,0,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,8,64,0,1,fp8,fp8,0,0.1546239952246348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,48,64,0,1,float16,float16,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,48,64,0,1,float16,fp8,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,48,64,0,1,fp8,fp8,0,0.09779199957847595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,1,64,0,1,float16,float16,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,1,64,0,1,float16,fp8,0,0.03942399968703588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,1,64,0,1,fp8,fp8,0,0.08482133348782857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,2,64,0,1,float16,float16,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,2,64,0,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,2,64,0,1,fp8,fp8,0,0.08482133348782857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,4,64,0,1,float16,float16,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,4,64,0,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,4,64,0,1,fp8,fp8,0,0.08567466338475545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,8,64,0,1,float16,float16,0,0.04027733455101649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,8,64,0,1,float16,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,8,64,0,1,fp8,fp8,0,0.08550399541854858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,48,64,0,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,48,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,48,64,0,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,1,64,0,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,1,64,0,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,1,64,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,2,64,0,1,float16,float16,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,2,64,0,1,fp8,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,2,64,0,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,4,64,0,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,4,64,0,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,4,64,0,1,fp8,fp8,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,8,64,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,8,64,0,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,8,64,0,1,fp8,fp8,0,0.05034666756788889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,48,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,48,64,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,1,64,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,48,64,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,1,64,0,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,1,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,2,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,2,64,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,2,64,0,1,fp8,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,4,64,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,4,64,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,4,64,0,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,8,64,0,1,float16,float16,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,8,64,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,8,64,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,48,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,48,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,1,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,48,64,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,1,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,1,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,2,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,2,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,2,64,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,4,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,4,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,8,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,8,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,4,64,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,8,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,48,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,48,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,48,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,1,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,1,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,2,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,1,64,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,2,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,2,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,4,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,4,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,4,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,8,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,8,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,8,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,48,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,48,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,48,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,1,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,2,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,2,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,4,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,4,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,8,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,8,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,8,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,1,64,0,1,float16,float16,0,0.17783466974894205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,1,64,0,1,float16,fp8,0,0.17783466974894205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,1,64,0,1,fp8,fp8,0,0.5478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,2,64,0,1,float16,float16,0,0.1914880077044169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,2,64,0,1,float16,fp8,0,0.19319466749827066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,4,64,0,1,float16,float16,0,0.2126506765683492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,4,64,0,1,float16,fp8,0,0.2039466698964437
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,2,64,0,1,fp8,fp8,0,0.5584213336308798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,4,64,0,1,fp8,fp8,0,0.5824853181838989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,8,64,0,1,float16,float16,0,0.27374933163324994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,8,64,0,1,float16,fp8,0,0.2524159948031108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,48,64,0,1,float16,float16,0,0.2624853253364563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,8,64,0,1,fp8,fp8,0,0.6237866481145223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,48,64,0,1,float16,fp8,0,0.18619734048843384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,48,64,0,1,fp8,fp8,0,0.45653335253397626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,1,64,0,1,float16,float16,0,0.07901866734027863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,1,64,0,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,1,64,0,1,fp8,fp8,0,0.2505386670430501
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,2,64,0,1,float16,float16,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,2,64,0,1,float16,fp8,0,0.08004266520341237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,2,64,0,1,fp8,fp8,0,0.252074658870697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,4,64,0,1,float16,float16,0,0.08021333316961925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,4,64,0,1,float16,fp8,0,0.07987200220425923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,4,64,0,1,fp8,fp8,0,0.2525866627693176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,8,64,0,1,float16,float16,0,0.08140799899895985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,8,64,0,1,float16,fp8,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,8,64,0,1,fp8,fp8,0,0.2529279987017314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,48,64,0,1,float16,float16,0,0.04795733094215393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,48,64,0,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,48,64,0,1,fp8,fp8,0,0.14728533228238425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,1,64,0,1,float16,float16,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,1,64,0,1,float16,fp8,0,0.04351999859015147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,1,64,0,1,fp8,fp8,0,0.13448533415794373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,2,64,0,1,float16,float16,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,2,64,0,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,2,64,0,1,fp8,fp8,0,0.13448533415794373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,4,64,0,1,float16,float16,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,4,64,0,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,4,64,0,1,fp8,fp8,0,0.13448533415794373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,8,64,0,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,8,64,0,1,float16,float16,0,0.04351999859015147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,8,64,0,1,fp8,fp8,0,0.13431466619173685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,48,64,0,1,float16,float16,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,48,64,0,1,float16,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,48,64,0,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,1,64,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,1,64,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,1,64,0,1,fp8,fp8,0,0.07577600081761678
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,2,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,2,64,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,2,64,0,1,fp8,fp8,0,0.07543466488520305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,4,64,0,1,float16,float16,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,4,64,0,1,float16,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,4,64,0,1,fp8,fp8,0,0.07611733178297679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,8,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,8,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,8,64,0,1,fp8,fp8,0,0.07611733178297679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,48,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,48,64,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,48,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,1,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,1,64,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,1,64,0,1,fp8,fp8,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,2,64,0,1,float16,float16,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,2,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,2,64,0,1,fp8,fp8,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,4,64,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,4,64,0,1,fp8,fp8,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,4,64,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,8,64,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,8,64,0,1,fp8,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,8,64,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,48,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,48,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,48,64,0,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,1,64,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,1,64,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,1,64,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,2,64,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,2,64,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,2,64,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,4,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,4,64,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,4,64,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,8,64,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,8,64,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,8,64,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,48,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,48,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,48,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,1,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,1,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,2,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,1,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,2,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,2,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,4,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,4,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,4,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,8,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,8,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,8,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,48,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,48,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,48,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,1,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,2,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,4,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,4,64,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,8,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,8,64,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,48,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,48,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,48,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,1,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,1,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,2,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,4,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,4,64,0,1,float16,fp8,0,0.009194666519761086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,8,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,8,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,1,64,0,1,float16,float16,0,0.11315199732780457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,1,64,0,1,float16,fp8,0,0.11332266529401143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,2,64,0,1,float16,float16,0,0.11417599519093831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,1,64,0,1,fp8,fp8,0,0.4522666533788045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,2,64,0,1,float16,fp8,0,0.11520000298817952
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,2,64,0,1,fp8,fp8,0,0.45209598541259766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,4,64,0,1,float16,float16,0,0.1153706709543864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,4,64,0,1,float16,fp8,0,0.11502933502197266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,4,64,0,1,fp8,fp8,0,0.46199464797973633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,8,64,0,1,float16,float16,0,0.1160533328851064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,8,64,0,1,float16,fp8,0,0.1153706709543864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,8,64,0,1,fp8,fp8,0,0.45482667287190753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,48,64,0,1,float16,float16,0,0.06297599772612254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,48,64,0,1,float16,fp8,0,0.06075733403364817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,48,64,0,1,fp8,fp8,0,0.24627200762430826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,1,64,0,1,float16,float16,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,1,64,0,1,float16,fp8,0,0.06109866499900818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,1,64,0,1,fp8,fp8,0,0.2336426575978597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,2,64,0,1,float16,float16,0,0.06126933296521505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,2,64,0,1,float16,fp8,0,0.06126933296521505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,2,64,0,1,fp8,fp8,0,0.23398399353027344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,4,64,0,1,float16,float16,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,4,64,0,1,float16,fp8,0,0.061610668897628784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,4,64,0,1,fp8,fp8,0,0.23398399353027344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,8,64,0,1,float16,float16,0,0.061610668897628784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,8,64,0,1,float16,fp8,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,8,64,0,1,fp8,fp8,0,0.23432532946268717
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,48,64,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,48,64,0,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,48,64,0,1,fp8,fp8,0,0.12595199545224509
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,1,64,0,1,float16,float16,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,1,64,0,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,2,64,0,1,float16,float16,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,1,64,0,1,fp8,fp8,0,0.12526933352152506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,2,64,0,1,float16,fp8,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,2,64,0,1,fp8,fp8,0,0.12475732962290446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,4,64,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,4,64,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,4,64,0,1,fp8,fp8,0,0.12595199545224509
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,8,64,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,8,64,0,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,8,64,0,1,fp8,fp8,0,0.12612266341845194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,48,64,0,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,48,64,0,1,float16,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,48,64,0,1,fp8,fp8,0,0.07133866846561432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,1,64,0,1,float16,float16,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,1,64,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,1,64,0,1,fp8,fp8,0,0.07031466563542683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,2,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,2,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,2,64,0,1,fp8,fp8,0,0.07048533360163371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,4,64,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,4,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,4,64,0,1,fp8,fp8,0,0.07150933146476746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,8,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,8,64,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,8,64,0,1,fp8,fp8,0,0.07150933146476746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,48,64,0,1,float16,float16,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,48,64,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,48,64,0,1,fp8,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,1,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,1,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,1,64,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,2,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,2,64,0,1,float16,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,2,64,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,4,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,4,64,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,4,64,0,1,fp8,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,8,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,8,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,8,64,0,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,48,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,48,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,48,64,0,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,1,64,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,1,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,1,64,0,1,fp8,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,2,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,2,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,2,64,0,1,fp8,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,4,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,4,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,4,64,0,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,8,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,8,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,8,64,0,1,fp8,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,48,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,48,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,48,64,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,1,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,2,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,2,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,4,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,8,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,8,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,48,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,48,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,48,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,1,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,1,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,1,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,2,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,2,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,2,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,4,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,4,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,8,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,8,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,8,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,48,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,48,64,0,1,float16,fp8,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,48,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,1,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,8,64,0,1,float16,float16,0,0.0086666668454806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,8,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,8,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,1,64,0,1,float16,float16,0,78.76437377929688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,1,64,0,1,float16,fp8,0,79.07618204752605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,float16,0,77.0546366373698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,fp8,0,79.02720133463542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,float16,0,77.45826212565105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,fp8,0,79.11646016438802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,1,64,0,1,fp8,fp8,0,99.2206522623698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,2,64,0,1,fp8,fp8,0,99.69783528645833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,float16,0,38.68006388346354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,fp8,0,38.58568572998047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,1,64,0,1,float16,float16,0,37.72740173339844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,40,64,0,1,fp8,fp8,0,51.68213399251302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,float16,0,79.46240234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,fp8,0,78.2035624186198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,4,64,0,1,fp8,fp8,0,99.5054931640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,1,64,0,1,float16,fp8,0,37.69514719645182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,8,64,0,1,fp8,fp8,0,100.62472534179688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,1,64,0,1,fp8,fp8,0,48.68590799967448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,float16,0,37.471232096354164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,fp8,0,38.22967529296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,float16,0,37.18348693847656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,2,64,0,1,fp8,fp8,0,48.31146748860677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,fp8,0,36.292948404947914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,float16,0,37.50673166910807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,float16,0,19.31383514404297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,4,64,0,1,fp8,fp8,0,48.406697591145836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,fp8,0,18.917887369791668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,fp8,0,37.520896911621094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,40,64,0,1,fp8,fp8,0,25.32659149169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,8,64,0,1,fp8,fp8,0,49.183746337890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,1,64,0,1,float16,float16,0,18.895360310872395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,1,64,0,1,float16,fp8,0,19.346261342366535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,float16,0,18.953044891357422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,1,64,0,1,fp8,fp8,0,24.54340362548828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,fp8,0,18.929322560628254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,2,64,0,1,fp8,fp8,0,24.549034118652344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,fp8,0,18.63475163777669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,float16,0,19.04315694173177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,4,64,0,1,fp8,fp8,0,24.264020284016926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,float16,0,18.94314702351888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,float16,0,9.949354807535807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,fp8,0,18.570069630940754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,fp8,0,9.997141520182291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,1,64,0,1,float16,float16,0,10.105855941772461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,8,64,0,1,fp8,fp8,0,24.801109313964844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,40,64,0,1,fp8,fp8,0,12.895061492919922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,1,64,0,1,float16,fp8,0,9.644543965657553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,float16,0,9.763498942057291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,1,64,0,1,fp8,fp8,0,12.306432088216146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,fp8,0,9.718954722086588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,2,64,0,1,fp8,fp8,0,12.432725270589193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,float16,0,9.527125040690104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,fp8,0,9.936896006266275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,float16,0,9.628672281901041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,4,64,0,1,fp8,fp8,0,12.332202911376953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,fp8,0,9.833642959594727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,8,64,0,1,fp8,fp8,0,12.367530822753906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,1,64,0,1,float16,float16,0,43.28089396158854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,1,64,0,1,float16,fp8,0,43.934549967447914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,float16,0,44.08968607584635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,fp8,0,43.010050455729164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,1,64,0,1,fp8,fp8,0,54.99699401855469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,float16,0,43.089579264322914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,fp8,0,43.4882558186849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,2,64,0,1,fp8,fp8,0,55.86329650878906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,float16,0,22.429524739583332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,fp8,0,22.270294189453125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,1,64,0,1,float16,float16,0,22.347946166992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,float16,0,43.52392578125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,40,64,0,1,fp8,fp8,0,29.351595560709637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,4,64,0,1,fp8,fp8,0,55.80339050292969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,fp8,0,43.942057291666664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,8,64,0,1,fp8,fp8,0,56.738301595052086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,1,64,0,1,float16,fp8,0,22.09058125813802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,float16,0,22.050987243652344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,1,64,0,1,fp8,fp8,0,27.534164428710938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,fp8,0,22.13819630940755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,float16,0,21.3032964070638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,fp8,0,22.06122589111328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,2,64,0,1,fp8,fp8,0,27.6125005086263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,float16,0,11.713706970214844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,4,64,0,1,fp8,fp8,0,27.623423258463543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,float16,0,21.53386688232422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,fp8,0,11.73964818318685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,1,64,0,1,float16,float16,0,11.433301289876303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,fp8,0,21.794474283854168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,40,64,0,1,fp8,fp8,0,14.954666137695312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,8,64,0,1,fp8,fp8,0,27.92857615152995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,1,64,0,1,float16,fp8,0,11.461461385091146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,float16,0,11.304276784261068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,fp8,0,11.354452768961588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,1,64,0,1,fp8,fp8,0,13.817855834960938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,2,64,0,1,fp8,fp8,0,13.902506510416666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,float16,0,11.318954467773438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,fp8,0,11.495765686035156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,4,64,0,1,fp8,fp8,0,13.997397104899088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,float16,0,5.750272115071614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,float16,0,11.205632527669271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,fp8,0,11.286698659261068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,fp8,0,5.70248540242513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,1,64,0,1,float16,float16,0,5.256021181742351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,8,64,0,1,fp8,fp8,0,13.915306091308594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,40,64,0,1,fp8,fp8,0,7.436117172241211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,1,64,0,1,float16,fp8,0,5.405525207519531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,1,64,0,1,fp8,fp8,0,7.083861033121745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,fp8,0,5.553834915161133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,float16,0,5.658453623453776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,float16,0,5.698218663533528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,2,64,0,1,fp8,fp8,0,7.076864242553711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,fp8,0,5.133994738260905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,4,64,0,1,fp8,fp8,0,7.143765131632487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,float16,0,5.433343887329102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,fp8,0,4.89301331837972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,8,64,0,1,fp8,fp8,0,7.053824106852214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,1,64,0,1,float16,float16,0,31.12482198079427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,1,64,0,1,float16,fp8,0,31.44567362467448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,float16,0,30.595242818196613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,fp8,0,30.59899648030599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,1,64,0,1,fp8,fp8,0,38.59387715657552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,float16,0,30.063446044921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,2,64,0,1,fp8,fp8,0,39.10502370198568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,fp8,0,30.514005025227863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,float16,0,16.310101826985676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,fp8,0,15.94606908162435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,fp8,0,30.35869852701823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,float16,0,31.263743082682293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,1,64,0,1,float16,float16,0,15.694506327311197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,4,64,0,1,fp8,fp8,0,39.63665008544922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,40,64,0,1,fp8,fp8,0,20.817237854003906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,8,64,0,1,fp8,fp8,0,39.766868591308594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,1,64,0,1,float16,fp8,0,15.867562611897787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,float16,0,15.62282689412435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,fp8,0,15.910228729248047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,1,64,0,1,fp8,fp8,0,19.257002512613933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,2,64,0,1,fp8,fp8,0,19.277652740478516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,float16,0,15.705599466959635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,fp8,0,15.296512603759766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,4,64,0,1,fp8,fp8,0,19.53416570027669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,float16,0,15.386112213134766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,float16,0,8.184661229451498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,fp8,0,7.966037114461263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,1,64,0,1,float16,float16,0,7.517525355021159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,40,64,0,1,fp8,fp8,0,10.585258483886719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,fp8,0,15.491925557454428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,8,64,0,1,fp8,fp8,0,19.97994613647461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,1,64,0,1,float16,fp8,0,8.277503967285156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,1,64,0,1,fp8,fp8,0,9.79046376546224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,float16,0,7.774208068847656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,fp8,0,7.881557464599609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,2,64,0,1,fp8,fp8,0,9.831765492757162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,float16,0,7.971328099568685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,fp8,0,7.73410161336263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,float16,0,8.15172259012858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,4,64,0,1,fp8,fp8,0,9.961813608805338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,fp8,0,7.69706662495931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,float16,0,3.758421262105306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,fp8,0,3.8388051986694336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,1,64,0,1,float16,float16,0,3.3972905476888022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,8,64,0,1,fp8,fp8,0,9.88040542602539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,40,64,0,1,fp8,fp8,0,5.259946823120117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,1,64,0,1,float16,fp8,0,3.6857172648111978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,1,64,0,1,fp8,fp8,0,4.995754559834798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,fp8,0,3.3309014638264975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,float16,0,3.7080745697021484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,float16,0,3.603626569112142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,fp8,0,3.5333118438720703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,2,64,0,1,fp8,fp8,0,4.939093271891276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,4,64,0,1,fp8,fp8,0,5.013845443725586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,float16,0,3.646122614542643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,fp8,0,3.371861457824707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,8,64,0,1,fp8,fp8,0,4.988074620564778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,1,64,0,1,float16,float16,0,41.26532236735026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,1,64,0,1,float16,fp8,0,40.84292348225912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,float16,0,41.087318420410156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,fp8,0,41.74950408935547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,1,64,0,1,fp8,fp8,0,50.65096537272135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,float16,0,41.00386047363281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,fp8,0,40.55091094970703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,2,64,0,1,fp8,fp8,0,52.594685872395836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,float16,0,21.23878351847331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,fp8,0,21.45587158203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,1,64,0,1,float16,float16,0,20.674388885498047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,40,64,0,1,fp8,fp8,0,27.58604685465495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,float16,0,40.30293273925781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,fp8,0,39.53954060872396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,4,64,0,1,fp8,fp8,0,52.892669677734375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,8,64,0,1,fp8,fp8,0,54.267903645833336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,1,64,0,1,float16,fp8,0,20.094292958577473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,float16,0,20.50542958577474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,1,64,0,1,fp8,fp8,0,25.320960998535156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,fp8,0,20.054356892903645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,float16,0,19.324586232503254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,2,64,0,1,fp8,fp8,0,25.172139485677082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,fp8,0,20.3328857421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,4,64,0,1,fp8,fp8,0,25.63805898030599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,float16,0,20.00145085652669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,float16,0,11.372032165527344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,fp8,0,10.935296376546225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,1,64,0,1,float16,float16,0,10.361173629760742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,fp8,0,20.415828704833984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,40,64,0,1,fp8,fp8,0,13.729450225830078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,8,64,0,1,fp8,fp8,0,26.089813232421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,1,64,0,1,float16,fp8,0,10.646698633829752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,float16,0,10.306218465169271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,1,64,0,1,fp8,fp8,0,12.615679423014322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,fp8,0,10.32089614868164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,float16,0,10.904576619466146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,2,64,0,1,fp8,fp8,0,12.601685841878256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,fp8,0,10.473471959431967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,4,64,0,1,fp8,fp8,0,12.78890609741211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,float16,0,5.406378428141276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,float16,0,10.551637649536133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,fp8,0,10.67093276977539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,fp8,0,5.26148255666097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,1,64,0,1,float16,float16,0,4.990463892618815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,40,64,0,1,fp8,fp8,0,6.858922958374023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,8,64,0,1,fp8,fp8,0,12.762794494628906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,1,64,0,1,float16,fp8,0,5.304490725199382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,float16,0,5.164202690124512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,1,64,0,1,fp8,fp8,0,6.331392288208008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,fp8,0,4.318378766377767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,2,64,0,1,fp8,fp8,0,6.345898946126302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,float16,0,5.0215253829956055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,fp8,0,5.172565460205078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,4,64,0,1,fp8,fp8,0,6.342826843261719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,float16,0,4.769962628682454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,float16,0,2.507434686024984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,fp8,0,4.592981338500977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,fp8,0,2.5212586720784507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,1,64,0,1,float16,float16,0,2.219007968902588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,8,64,0,1,fp8,fp8,0,6.443861643473308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,40,64,0,1,fp8,fp8,0,3.4022401173909507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,1,64,0,1,float16,fp8,0,2.215424060821533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,1,64,0,1,fp8,fp8,0,3.1486291885375977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,float16,0,2.2311253547668457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,fp8,0,2.2415359814961753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,float16,0,2.1628586451212564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,2,64,0,1,fp8,fp8,0,3.129002571105957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,fp8,0,2.3232852617899575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,4,64,0,1,fp8,fp8,0,3.173717180887858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,float16,0,2.182997385660807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,fp8,0,2.2183252970377603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,8,64,0,1,fp8,fp8,0,3.1353174845377603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,1,64,0,1,float16,float16,0,24.095914204915363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,1,64,0,1,float16,fp8,0,23.891626993815105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,float16,0,23.461034138997395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,fp8,0,24.02850087483724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,1,64,0,1,fp8,fp8,0,29.117268880208332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,float16,0,24.74939727783203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,2,64,0,1,fp8,fp8,0,29.413546244303387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,fp8,0,23.685630798339844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,float16,0,13.250731150309244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,fp8,0,13.134165445963541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,1,64,0,1,float16,float16,0,12.119552612304688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,40,64,0,1,fp8,fp8,0,16.16588846842448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,float16,0,23.86414845784505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,fp8,0,23.429290771484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,4,64,0,1,fp8,fp8,0,30.200660705566406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,8,64,0,1,fp8,fp8,0,30.437204996744793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,1,64,0,1,float16,fp8,0,12.367188771565756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,float16,0,12.253184000651041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,1,64,0,1,fp8,fp8,0,14.62783940633138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,fp8,0,11.94222895304362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,float16,0,11.954004923502604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,2,64,0,1,fp8,fp8,0,14.467754364013672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,fp8,0,12.242603302001953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,4,64,0,1,fp8,fp8,0,14.551722208658854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,float16,0,6.613845189412435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,fp8,0,6.566399892171224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,float16,0,12.142250061035156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,40,64,0,1,fp8,fp8,0,8.126976013183594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,fp8,0,12.193108876546225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,1,64,0,1,float16,float16,0,6.102357228597005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,8,64,0,1,fp8,fp8,0,15.055872599283854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,1,64,0,1,float16,fp8,0,5.9912535349528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,float16,0,5.947733561197917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,1,64,0,1,fp8,fp8,0,7.2159576416015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,float16,0,5.539157231648763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,fp8,0,6.058154424031575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,2,64,0,1,fp8,fp8,0,7.3224531809488935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,fp8,0,6.07914670308431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,4,64,0,1,fp8,fp8,0,7.391232172648112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,float16,0,5.692586898803711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,float16,0,3.1185919443766275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,fp8,0,6.006613413492839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,fp8,0,3.060394605000814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,40,64,0,1,fp8,fp8,0,3.9920641581217446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,8,64,0,1,fp8,fp8,0,7.35539182027181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,1,64,0,1,float16,float16,0,2.5501012802124023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,1,64,0,1,float16,fp8,0,2.7089920043945312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,1,64,0,1,fp8,fp8,0,3.533482551574707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,float16,0,2.4722773234049478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,fp8,0,2.5328639348347983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,float16,0,2.4458239873250327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,2,64,0,1,fp8,fp8,0,3.594751993815104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,fp8,0,2.4267093340555825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,float16,0,2.6195626258850098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,4,64,0,1,fp8,fp8,0,3.584170659383138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,fp8,0,2.58679469426473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,float16,0,1.4917972882588704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,fp8,0,1.45305601755778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,8,64,0,1,fp8,fp8,0,3.6223999659220376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,40,64,0,1,fp8,fp8,0,2.0130133628845215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,1,64,0,1,float16,float16,0,1.356287956237793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,1,64,0,1,float16,fp8,0,1.3323946793874104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,float16,0,1.3004799683888753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,1,64,0,1,fp8,fp8,0,1.8017279307047527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,fp8,0,1.3487787246704102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,2,64,0,1,fp8,fp8,0,1.8029227256774902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,float16,0,1.3370025952657063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,fp8,0,1.3153279622395833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,4,64,0,1,fp8,fp8,0,1.8030932744344075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,float16,0,1.2963840166727703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,fp8,0,1.2984320322672527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,8,64,0,1,fp8,fp8,0,1.8131626447041829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,1,64,0,1,float16,float16,0,22.937942504882812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,1,64,0,1,float16,fp8,0,22.78997294108073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,fp8,0,22.696619669596355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,float16,0,23.98583475748698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,1,64,0,1,fp8,fp8,0,27.483306884765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,float16,0,23.117482503255207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,2,64,0,1,fp8,fp8,0,29.3932367960612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,fp8,0,22.884521484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,float16,0,13.135701497395834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,fp8,0,12.816725413004557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,float16,0,23.315114339192707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,1,64,0,1,float16,float16,0,11.63485844930013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,40,64,0,1,fp8,fp8,0,16.190804799397785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,fp8,0,23.060991923014324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,4,64,0,1,fp8,fp8,0,29.42156728108724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,8,64,0,1,fp8,fp8,0,30.627840677897137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,1,64,0,1,float16,fp8,0,11.696980794270834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,1,64,0,1,fp8,fp8,0,13.331626892089844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,float16,0,11.642538706461588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,fp8,0,11.528874715169271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,float16,0,11.574101765950521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,2,64,0,1,fp8,fp8,0,13.696341196695963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,fp8,0,11.31332270304362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,4,64,0,1,fp8,fp8,0,13.83560562133789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,float16,0,11.605845133463541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,fp8,0,6.143658955891927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,float16,0,6.506837209065755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,fp8,0,11.552085876464844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,1,64,0,1,float16,float16,0,5.549226760864258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,40,64,0,1,fp8,fp8,0,7.945557276407878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,8,64,0,1,fp8,fp8,0,14.09655507405599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,1,64,0,1,float16,fp8,0,5.681151707967122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,1,64,0,1,fp8,fp8,0,6.672384262084961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,float16,0,5.50929069519043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,fp8,0,5.235541343688965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,float16,0,5.360640207926433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,fp8,0,5.358762741088867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,2,64,0,1,fp8,fp8,0,6.698325475056966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,4,64,0,1,fp8,fp8,0,6.8887894948323565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,float16,0,5.4493865966796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,fp8,0,5.708117167154948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,fp8,0,3.0405972798665366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,float16,0,3.2394240697224936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,1,64,0,1,float16,float16,0,2.3391572634379068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,40,64,0,1,fp8,fp8,0,3.893418629964193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,8,64,0,1,fp8,fp8,0,6.893226623535156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,1,64,0,1,float16,fp8,0,2.3278932571411133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,1,64,0,1,fp8,fp8,0,3.2928425470987954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,float16,0,2.364586671193441
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,fp8,0,2.360661347707113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,float16,0,2.410154660542806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,fp8,0,2.4924160639444985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,4,64,0,1,fp8,fp8,0,3.2686080932617188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,2,64,0,1,fp8,fp8,0,3.267242749532064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,float16,0,2.5084586143493652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,fp8,0,2.538837273915609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,8,64,0,1,fp8,fp8,0,3.3394346237182617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,float16,0,1.508010705312093
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,fp8,0,1.4545920689900715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,40,64,0,1,fp8,fp8,0,1.943552017211914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,1,64,0,1,float16,float16,0,1.1499520142873128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,1,64,0,1,fp8,fp8,0,1.5993173917134602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,1,64,0,1,float16,fp8,0,1.1733333269755046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,float16,0,1.184597333272298
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,fp8,0,1.144320011138916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,fp8,0,1.1793066660563152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,2,64,0,1,fp8,fp8,0,1.6046080589294434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,float16,0,1.128106673558553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,float16,0,1.1357866923014324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,4,64,0,1,fp8,fp8,0,1.6134826342264812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,fp8,0,1.1583147048950195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,8,64,0,1,fp8,fp8,0,1.666218598683675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,float16,0,0.640341321627299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,fp8,0,0.6388053496678671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,1,64,0,1,float16,fp8,0,0.6596266825993856
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,40,64,0,1,fp8,fp8,0,0.9729706446329752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,1,64,0,1,float16,float16,0,0.6393173138300577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,1,64,0,1,fp8,fp8,0,0.8733013470967611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,float16,0,0.632149338722229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,fp8,0,0.6539946794509888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,2,64,0,1,fp8,fp8,0,0.8625493049621582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,float16,0,0.6372693379720052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,fp8,0,0.6719146569569906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,float16,0,0.6333440144856771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,4,64,0,1,fp8,fp8,0,0.86954665184021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,fp8,0,0.6413653294245402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,8,64,0,1,fp8,fp8,0,0.8721066315968832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,1,64,0,1,float16,float16,0,13.801984151204428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,1,64,0,1,float16,fp8,0,13.73678970336914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,float16,0,13.97589365641276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,1,64,0,1,fp8,fp8,0,16.05068842569987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,fp8,0,13.84447987874349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,float16,0,13.774847666422525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,2,64,0,1,fp8,fp8,0,16.40550359090169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,fp8,0,13.81717300415039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,4,64,0,1,fp8,fp8,0,16.841898600260418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,fp8,0,7.975765228271484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,float16,0,8.265386581420898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,float16,0,13.88595199584961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,1,64,0,1,float16,float16,0,6.232917149861653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,40,64,0,1,fp8,fp8,0,9.560405095418295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,fp8,0,14.273024241129557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,8,64,0,1,fp8,fp8,0,17.510400136311848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,1,64,0,1,float16,fp8,0,6.533632278442383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,float16,0,6.653952280680339
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,1,64,0,1,fp8,fp8,0,7.784618377685547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,fp8,0,6.225749333699544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,float16,0,6.62391471862793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,fp8,0,6.549845377604167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,2,64,0,1,fp8,fp8,0,7.978325525919597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,4,64,0,1,fp8,fp8,0,8.173738479614258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,float16,0,4.061183929443359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,float16,0,6.572032292683919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,fp8,0,6.659413019816081
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,fp8,0,3.88590939839681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,1,64,0,1,float16,float16,0,3.0044161478678384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,40,64,0,1,fp8,fp8,0,4.70033073425293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,8,64,0,1,fp8,fp8,0,8.196095784505209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,1,64,0,1,float16,fp8,0,2.9499734242757163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,1,64,0,1,fp8,fp8,0,3.791701316833496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,float16,0,3.0532267888387046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,fp8,0,2.920618693033854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,2,64,0,1,fp8,fp8,0,3.8737920125325522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,fp8,0,2.99946657816569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,float16,0,3.064490636189779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,4,64,0,1,fp8,fp8,0,3.8775466283162436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,float16,0,3.1795199712117515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,float16,0,1.9401386578877766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,fp8,0,3.0685866673787436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,fp8,0,1.8553172747294109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,8,64,0,1,fp8,fp8,0,4.015445391337077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,40,64,0,1,fp8,fp8,0,2.3231147130330405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,1,64,0,1,float16,float16,0,1.3771093686421711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,1,64,0,1,float16,fp8,0,1.3429759343465169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,1,64,0,1,fp8,fp8,0,1.877504030863444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,fp8,0,1.3421227137247722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,float16,0,1.337685267130534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,fp8,0,1.3697706858317058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,float16,0,1.4149972597757976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,2,64,0,1,fp8,fp8,0,1.872213363647461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,float16,0,1.4354772567749023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,4,64,0,1,fp8,fp8,0,1.896959940592448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,fp8,0,1.446741263071696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,float16,0,0.9076053301493326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,8,64,0,1,fp8,fp8,0,1.9473066329956055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,fp8,0,0.8591360251108805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,1,64,0,1,float16,float16,0,0.6853973070780436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,1,64,0,1,float16,fp8,0,0.6881279945373535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,40,64,0,1,fp8,fp8,0,1.1953492959340413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,1,64,0,1,fp8,fp8,0,0.9497600396474203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,float16,0,0.7070720195770264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,float16,0,0.6816426912943522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,2,64,0,1,fp8,fp8,0,0.9485653241475424
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,fp8,0,0.6789120038350424
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,fp8,0,0.673962672551473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,4,64,0,1,fp8,fp8,0,0.951807975769043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,float16,0,0.6971733570098877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,fp8,0,0.6929066975911459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,8,64,0,1,fp8,fp8,0,0.971776008605957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,float16,0,0.38741334279378253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,fp8,0,0.38929065068562824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,40,64,0,1,fp8,fp8,0,0.5504000186920166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,1,64,0,1,float16,float16,0,0.40533332029978436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,1,64,0,1,float16,fp8,0,0.39765334129333496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,1,64,0,1,fp8,fp8,0,0.5372586647669474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,fp8,0,0.39321601390838623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,float16,0,0.39816534519195557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,float16,0,0.40994131565093994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,2,64,0,1,fp8,fp8,0,0.5307733217875162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,fp8,0,0.40004265308380127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,4,64,0,1,fp8,fp8,0,0.5278720060984293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,float16,0,0.40635732809702557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,fp8,0,0.39150933424631756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,8,64,0,1,fp8,fp8,0,0.5360639890034994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,1,64,0,1,float16,float16,0,13.682688395182291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,1,64,0,1,float16,fp8,0,13.628074645996094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,1,64,0,1,fp8,fp8,0,15.727274576822916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,float16,0,14.089728037516275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,fp8,0,13.978453318277994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,float16,0,14.33395258585612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,2,64,0,1,fp8,fp8,0,16.736426035563152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,fp8,0,14.945621490478516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,4,64,0,1,fp8,fp8,0,17.484288533528645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,float16,0,9.213098526000977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,1,64,0,1,float16,float16,0,6.67033576965332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,fp8,0,8.885930379231771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,float16,0,14.425941467285156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,40,64,0,1,fp8,fp8,0,10.106709162394205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,fp8,0,14.29196802775065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,8,64,0,1,fp8,fp8,0,17.868799845377605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,1,64,0,1,float16,fp8,0,6.2675628662109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,float16,0,6.605482737223308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,fp8,0,6.6575361887613935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,1,64,0,1,fp8,fp8,0,7.476224263509114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,float16,0,6.387200037638347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,2,64,0,1,fp8,fp8,0,7.680853525797526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,fp8,0,6.791680018107097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,4,64,0,1,fp8,fp8,0,7.866026560465495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,float16,0,4.428458531697591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,float16,0,6.752426783243815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,fp8,0,4.240384101867676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,fp8,0,6.9705384572347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,40,64,0,1,fp8,fp8,0,4.808362642923991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,8,64,0,1,fp8,fp8,0,8.285354614257812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,1,64,0,1,float16,float16,0,2.960554758707682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,1,64,0,1,float16,fp8,0,2.972842534383138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,float16,0,3.0429865519205728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,1,64,0,1,fp8,fp8,0,3.6314452489217124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,fp8,0,2.9853013356526694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,float16,0,3.0993067423502603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,2,64,0,1,fp8,fp8,0,3.7447681427001953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,fp8,0,3.121493339538574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,4,64,0,1,fp8,fp8,0,3.7500588099161782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,float16,0,3.2220160166422525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,float16,0,2.145962715148926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,fp8,0,2.0396374066670737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,fp8,0,3.2448854446411133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,8,64,0,1,fp8,fp8,0,3.8766934076944985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,1,64,0,1,float16,float16,0,1.4052693049112956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,40,64,0,1,fp8,fp8,0,2.4111785888671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,1,64,0,1,float16,fp8,0,1.363968054453532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,1,64,0,1,fp8,fp8,0,1.7703253428141277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,float16,0,1.3839359283447266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,fp8,0,1.3796693483988445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,float16,0,1.4144852956136067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,fp8,0,1.442815939585368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,float16,0,1.5223466555277507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,2,64,0,1,fp8,fp8,0,1.7978026072184246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,4,64,0,1,fp8,fp8,0,1.850538730621338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,fp8,0,1.480533281962077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,float16,0,0.9941333134969076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,8,64,0,1,fp8,fp8,0,1.898837407430013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,1,64,0,1,float16,float16,0,0.641706665356954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,fp8,0,0.9260373115539551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,40,64,0,1,fp8,fp8,0,1.218559980392456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,1,64,0,1,fp8,fp8,0,0.8929279645284017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,1,64,0,1,float16,fp8,0,0.6568959951400757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,float16,0,0.6365866661071777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,fp8,0,0.6415359973907471
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,float16,0,0.6652586857477824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,2,64,0,1,fp8,fp8,0,0.8905386924743652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,fp8,0,0.6265173355738322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,float16,0,0.6705493132273356
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,4,64,0,1,fp8,fp8,0,0.9069226582845052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,fp8,0,0.6548480192820231
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,8,64,0,1,fp8,fp8,0,0.9376426537831625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,float16,0,0.3659093379974365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,fp8,0,0.3561813433965047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,40,64,0,1,fp8,fp8,0,0.5973333517710367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,1,64,0,1,float16,float16,0,0.3324586749076843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,1,64,0,1,float16,fp8,0,0.33450667063395184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,1,64,0,1,fp8,fp8,0,0.4773546854654948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,float16,0,0.33672531445821124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,fp8,0,0.32972800731658936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,2,64,0,1,fp8,fp8,0,0.47411199410756427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,float16,0,0.33587201436360675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,fp8,0,0.33655468622843426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,float16,0,0.3423573176066081
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,4,64,0,1,fp8,fp8,0,0.4766720136006673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,fp8,0,0.34303998947143555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,8,64,0,1,fp8,fp8,0,0.474453330039978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,float16,0,0.19729065895080566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,fp8,0,0.1976319948832194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,40,64,0,1,fp8,fp8,0,0.28125866254170734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,1,64,0,1,float16,float16,0,0.2001919945081075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,1,64,0,1,float16,fp8,0,0.20377600193023682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,1,64,0,1,fp8,fp8,0,0.27613866329193115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,float16,0,0.20121600230534872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,fp8,0,0.19848533471425375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,2,64,0,1,fp8,fp8,0,0.27426133553187054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,float16,0,0.20002132654190063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,fp8,0,0.19882667064666748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,4,64,0,1,fp8,fp8,0,0.2754559914271037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,float16,0,0.20121600230534872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,fp8,0,0.20360533396402994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,8,64,0,1,fp8,fp8,0,0.2775040070215861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,1,64,0,1,float16,float16,0,8.215893427530924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,1,64,0,1,float16,fp8,0,8.010410944620768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,1,64,0,1,fp8,fp8,0,9.262421290079752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,float16,0,8.434517542521158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,fp8,0,8.172885258992514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,float16,0,8.406016031901041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,2,64,0,1,fp8,fp8,0,9.88927968343099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,fp8,0,8.441855748494467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,float16,0,8.878762563069662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,4,64,0,1,fp8,fp8,0,10.03878402709961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,1,64,0,1,float16,float16,0,3.8495572408040366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,float16,0,5.995690663655599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,fp8,0,5.660671869913737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,fp8,0,8.751957575480143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,40,64,0,1,fp8,fp8,0,6.431402842203776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,8,64,0,1,fp8,fp8,0,10.726229349772135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,1,64,0,1,float16,fp8,0,3.670698801676432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,1,64,0,1,fp8,fp8,0,4.442282676696777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,float16,0,3.9017814000447593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,fp8,0,3.7971626917521157
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,2,64,0,1,fp8,fp8,0,4.532565434773763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,float16,0,3.952639897664388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,fp8,0,3.9536641438802085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,4,64,0,1,fp8,fp8,0,4.66107718149821
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,float16,0,4.159658749898274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,fp8,0,4.118698755900065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,float16,0,2.904575983683268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,1,64,0,1,float16,float16,0,1.8083839416503906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,8,64,0,1,fp8,fp8,0,4.895402590433757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,fp8,0,2.74124813079834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,40,64,0,1,fp8,fp8,0,3.0704641342163086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,1,64,0,1,float16,fp8,0,1.7703253428141277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,1,64,0,1,fp8,fp8,0,2.129749298095703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,float16,0,1.8379093805948894
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,fp8,0,1.7938772837320964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,2,64,0,1,fp8,fp8,0,2.2045013109842935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,fp8,0,1.8447359402974446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,float16,0,1.9200000762939453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,4,64,0,1,fp8,fp8,0,2.217301368713379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,float16,0,1.9997013409932454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,fp8,0,1.9585706392923992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,8,64,0,1,fp8,fp8,0,2.3301119804382324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,float16,0,1.3742079734802246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,fp8,0,1.3032106558481853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,1,64,0,1,float16,float16,0,0.8180053234100342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,1,64,0,1,float16,fp8,0,0.8024746576944987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,1,64,0,1,fp8,fp8,0,1.0738346576690674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,40,64,0,1,fp8,fp8,0,1.5307092666625977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,float16,0,0.802133321762085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,fp8,0,0.8162986437479655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,2,64,0,1,fp8,fp8,0,1.0821973482767742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,float16,0,0.8490666548411051
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,4,64,0,1,fp8,fp8,0,1.1112106641133626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,fp8,0,0.8383146921793619
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,float16,0,0.8987306753794352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,fp8,0,0.8644266923268636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,8,64,0,1,fp8,fp8,0,1.1680426597595215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,fp8,0,0.5577386617660522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,float16,0,0.6227626800537109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,1,64,0,1,float16,float16,0,0.40192000071207684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,40,64,0,1,fp8,fp8,0,0.7818240324656168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,1,64,0,1,float16,fp8,0,0.4065279960632324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,1,64,0,1,fp8,fp8,0,0.5329920053482056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,float16,0,0.38758401075998944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,fp8,0,0.3911679983139038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,2,64,0,1,fp8,fp8,0,0.532480001449585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,fp8,0,0.4145493507385254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,float16,0,0.3991893529891968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,4,64,0,1,fp8,fp8,0,0.5357226530710856
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,float16,0,0.400383989016215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,fp8,0,0.389631986618042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,8,64,0,1,fp8,fp8,0,0.5459626515706381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,float16,0,0.2218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,40,64,0,1,fp8,fp8,0,0.33655468622843426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,fp8,0,0.2244266668955485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,1,64,0,1,float16,float16,0,0.20258132616678873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,1,64,0,1,float16,fp8,0,0.202239990234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,1,64,0,1,fp8,fp8,0,0.28808534145355225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,float16,0,0.20070399840672812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,fp8,0,0.20155733823776245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,2,64,0,1,fp8,fp8,0,0.2903040051460266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,float16,0,0.20070399840672812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,float16,0,0.2053119937578837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,fp8,0,0.2039466698964437
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,4,64,0,1,fp8,fp8,0,0.28996266921361286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,8,64,0,1,fp8,fp8,0,0.2882560094197591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,fp8,0,0.20616533358891806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,float16,0,0.13090133666992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,fp8,0,0.13107200463612875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,40,64,0,1,fp8,fp8,0,0.1795413295427958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,1,64,0,1,float16,float16,0,0.1346560021241506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,1,64,0,1,float16,fp8,0,0.13329066832860312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,float16,0,0.13397333025932312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,1,64,0,1,fp8,fp8,0,0.17919999361038208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,2,64,0,1,fp8,fp8,0,0.17732266585032144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,fp8,0,0.13294933239618936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,float16,0,0.1327786644299825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,fp8,0,0.13346133629480997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,4,64,0,1,fp8,fp8,0,0.1764693260192871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,float16,0,0.13346133629480997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,fp8,0,0.13329066832860312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,8,64,0,1,fp8,fp8,0,0.1781760056813558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,1,64,0,1,float16,float16,0,8.276992162068685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,1,64,0,1,float16,fp8,0,8.390485127766928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,1,64,0,1,fp8,fp8,0,9.270442962646484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,float16,0,8.774314880371094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,fp8,0,8.90880012512207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,2,64,0,1,fp8,fp8,0,9.757183710734049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,float16,0,9.136639912923178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,fp8,0,9.111210505167643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,4,64,0,1,fp8,fp8,0,10.106538772583008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,float16,0,9.924437204996744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,1,64,0,1,float16,float16,0,4.034730593363444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,float16,0,7.068159739176433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,fp8,0,6.718122482299805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,40,64,0,1,fp8,fp8,0,6.9684906005859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,fp8,0,9.511082967122396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,8,64,0,1,fp8,fp8,0,10.789717356363932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,1,64,0,1,float16,fp8,0,3.9616851806640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,1,64,0,1,fp8,fp8,0,4.536320050557454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,float16,0,4.21614933013916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,fp8,0,4.181503931681315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,2,64,0,1,fp8,fp8,0,4.662442525227864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,float16,0,4.277248064676921
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,fp8,0,4.222634633382161
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,4,64,0,1,fp8,fp8,0,4.871850649515788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,float16,0,4.6028798421223955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,fp8,0,4.585983912150065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,1,64,0,1,float16,float16,0,1.9268266359965007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,fp8,0,3.2687788009643555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,float16,0,3.459925333658854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,8,64,0,1,fp8,fp8,0,5.1184641520182295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,1,64,0,1,float16,fp8,0,1.9087360699971516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,40,64,0,1,fp8,fp8,0,3.3931945164998374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,1,64,0,1,fp8,fp8,0,2.16866127649943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,float16,0,1.995946725209554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,fp8,0,1.9478185971577961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,2,64,0,1,fp8,fp8,0,2.2106453577677407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,float16,0,2.059434731801351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,fp8,0,2.074282646179199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,4,64,0,1,fp8,fp8,0,2.2797652880350747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,float16,0,2.186581293741862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,fp8,0,2.1222400665283203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,8,64,0,1,fp8,fp8,0,2.421759923299154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,float16,0,1.6677546501159668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,fp8,0,1.5732053120930989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,1,64,0,1,float16,float16,0,0.8823466300964355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,1,64,0,1,float16,fp8,0,0.8763733704884847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,40,64,0,1,fp8,fp8,0,1.6914772987365723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,1,64,0,1,fp8,fp8,0,1.0915839672088623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,float16,0,0.8901973565419515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,fp8,0,0.905898650487264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,2,64,0,1,fp8,fp8,0,1.0965332984924316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,float16,0,0.9390079975128174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,fp8,0,0.92740265528361
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,4,64,0,1,fp8,fp8,0,1.1344213485717773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,fp8,0,1.0004479885101318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,float16,0,1.032362699508667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,8,64,0,1,fp8,fp8,0,1.2052480379740398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,float16,0,0.7649280230204264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,40,64,0,1,fp8,fp8,0,0.8454826672871908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,fp8,0,0.7062186400095621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,1,64,0,1,fp8,fp8,0,0.514901320139567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,1,64,0,1,float16,fp8,0,0.39048532644907635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,1,64,0,1,float16,float16,0,0.3949226538340251
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,float16,0,0.3925333420435588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,fp8,0,0.3824640115102132
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,2,64,0,1,fp8,fp8,0,0.5237760146458944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,float16,0,0.3824640115102132
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,4,64,0,1,fp8,fp8,0,0.5346986850102743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,float16,0,0.40192000071207684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,fp8,0,0.3850239912668864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,fp8,0,0.40396801630655926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,8,64,0,1,fp8,fp8,0,0.5934079885482788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,float16,0,0.2461013396581014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,fp8,0,0.2213546633720398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,1,64,0,1,float16,float16,0,0.19217065970102945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,40,64,0,1,fp8,fp8,0,0.4116479953130086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,1,64,0,1,float16,fp8,0,0.19182932376861572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,fp8,0,0.20770132541656494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,float16,0,0.19473065932591757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,1,64,0,1,fp8,fp8,0,0.26897066831588745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,float16,0,0.19746132691701254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,2,64,0,1,fp8,fp8,0,0.26828799645106
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,fp8,0,0.19131733973821005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,float16,0,0.19882667064666748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,4,64,0,1,fp8,fp8,0,0.2686293323834737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,fp8,0,0.19746132691701254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,8,64,0,1,fp8,fp8,0,0.2720426718393962
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,float16,0,0.11349333326021831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,fp8,0,0.11281067132949829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,40,64,0,1,fp8,fp8,0,0.15103999773661295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,1,64,0,1,float16,float16,0,0.1114453375339508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,1,64,0,1,float16,fp8,0,0.11264000336329143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,1,64,0,1,fp8,fp8,0,0.14711466431617737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,float16,0,0.11332266529401143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,fp8,0,0.11195733149846394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,2,64,0,1,fp8,fp8,0,0.14677332838376364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,float16,0,0.11161599556605022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,fp8,0,0.11366400122642517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,4,64,0,1,fp8,fp8,0,0.14813866217931113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,float16,0,0.11195733149846394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,fp8,0,0.11281067132949829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,8,64,0,1,fp8,fp8,0,0.14967466394106546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,float16,0,0.06929066777229309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,fp8,0,0.06809600194295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,1,64,0,1,float16,float16,0,0.06775466601053874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,40,64,0,1,fp8,fp8,0,0.08550399541854858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,1,64,0,1,float16,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,1,64,0,1,fp8,fp8,0,0.08379733562469482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,float16,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,fp8,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,2,64,0,1,fp8,fp8,0,0.08533333738644917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,float16,0,0.06690133114655812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,fp8,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,4,64,0,1,fp8,fp8,0,0.08413867155710857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,float16,0,0.06656000018119812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,8,64,0,1,fp8,fp8,0,0.08430932958920796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,1,64,0,1,float16,float16,0,6.278314590454102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,1,64,0,1,fp8,fp8,0,6.327978769938151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,1,64,0,1,float16,fp8,0,6.225749333699544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,2,64,0,1,float16,float16,0,6.7601064046223955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,2,64,0,1,float16,fp8,0,6.771541595458984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,2,64,0,1,fp8,fp8,0,6.8283735911051435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,4,64,0,1,float16,float16,0,6.999551773071289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,4,64,0,1,float16,fp8,0,6.859434763590495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,4,64,0,1,fp8,fp8,0,7.263914744059245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,8,64,0,1,float16,fp8,0,7.69160525004069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,8,64,0,1,float16,float16,0,7.859200159708659
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,1,64,0,1,float16,float16,0,2.8938239415486655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,40,64,0,1,float16,float16,0,6.404266357421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,8,64,0,1,fp8,fp8,0,7.869781494140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,40,64,0,1,float16,fp8,0,6.079999923706055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,40,64,0,1,fp8,fp8,0,5.66545041402181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,1,64,0,1,float16,fp8,0,2.9404160181681314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,1,64,0,1,fp8,fp8,0,3.057663917541504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,2,64,0,1,float16,float16,0,3.118250528971354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,2,64,0,1,float16,fp8,0,3.1030613581339517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,2,64,0,1,fp8,fp8,0,3.1979519526163735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,4,64,0,1,float16,fp8,0,3.2121171951293945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,4,64,0,1,float16,float16,0,3.3334614435831704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,4,64,0,1,fp8,fp8,0,3.386538823445638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,8,64,0,1,float16,float16,0,3.6464640299479165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,8,64,0,1,float16,fp8,0,3.5638612111409507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,8,64,0,1,fp8,fp8,0,3.6638720830281577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,40,64,0,1,float16,float16,0,3.156991958618164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,1,64,0,1,float16,float16,0,1.3701119422912598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,1,64,0,1,float16,fp8,0,1.4015146891276042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,40,64,0,1,float16,fp8,0,2.9690879185994468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,40,64,0,1,fp8,fp8,0,2.7221333185831704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,1,64,0,1,fp8,fp8,0,1.5175679524739583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,2,64,0,1,float16,float16,0,1.4687573115030925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,2,64,0,1,float16,fp8,0,1.492479960123698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,2,64,0,1,fp8,fp8,0,1.5563093821207683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,4,64,0,1,float16,float16,0,1.5252480506896973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,4,64,0,1,float16,fp8,0,1.5167147318522136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,4,64,0,1,fp8,fp8,0,1.6266239484151204
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,8,64,0,1,float16,float16,0,1.7774933179219563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,8,64,0,1,float16,fp8,0,1.6824320157368977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,8,64,0,1,fp8,fp8,0,1.7271466255187988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,40,64,0,1,float16,float16,0,1.506816069285075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,40,64,0,1,float16,fp8,0,1.4103892644246419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,40,64,0,1,fp8,fp8,0,1.3281280199686687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,1,64,0,1,float16,float16,0,0.614741325378418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,1,64,0,1,float16,fp8,0,0.6217386722564697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,1,64,0,1,fp8,fp8,0,0.7389866511027018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,2,64,0,1,float16,float16,0,0.6500693162282308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,2,64,0,1,float16,fp8,0,0.6662826538085938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,2,64,0,1,fp8,fp8,0,0.7488853136698405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,4,64,0,1,float16,float16,0,0.6859093507130941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,4,64,0,1,fp8,fp8,0,0.7895039717356364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,4,64,0,1,float16,fp8,0,0.6698666413625082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,8,64,0,1,float16,fp8,0,0.7688533465067545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,8,64,0,1,float16,float16,0,0.7936000029246012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,8,64,0,1,fp8,fp8,0,0.8560640017191569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,40,64,0,1,float16,float16,0,0.6594560146331787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,40,64,0,1,float16,fp8,0,0.6046719948450724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,1,64,0,1,float16,float16,0,0.2691413362820943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,1,64,0,1,float16,fp8,0,0.25548799832661945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,40,64,0,1,fp8,fp8,0,0.6683306694030762
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,1,64,0,1,fp8,fp8,0,0.33075199524561566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,2,64,0,1,float16,float16,0,0.2686293323834737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,2,64,0,1,float16,fp8,0,0.2681173284848531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,2,64,0,1,fp8,fp8,0,0.33655468622843426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,4,64,0,1,float16,float16,0,0.25924267371495563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,4,64,0,1,float16,fp8,0,0.26146133740743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,4,64,0,1,fp8,fp8,0,0.359935998916626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,8,64,0,1,float16,fp8,0,0.26794666051864624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,8,64,0,1,float16,float16,0,0.28040534257888794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,8,64,0,1,fp8,fp8,0,0.4121599992116292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,40,64,0,1,float16,float16,0,0.18193066120147705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,40,64,0,1,float16,fp8,0,0.15001599987347922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,40,64,0,1,fp8,fp8,0,0.31948800881703693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,1,64,0,1,float16,float16,0,0.13038933277130127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,1,64,0,1,fp8,fp8,0,0.17203199863433838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,1,64,0,1,float16,fp8,0,0.13038933277130127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,2,64,0,1,float16,float16,0,0.12919466694196066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,2,64,0,1,float16,fp8,0,0.1295360028743744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,2,64,0,1,fp8,fp8,0,0.1730560064315796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,4,64,0,1,float16,float16,0,0.1293653349081675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,4,64,0,1,float16,fp8,0,0.12800000111262003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,4,64,0,1,fp8,fp8,0,0.1713493267695109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,8,64,0,1,float16,float16,0,0.13107200463612875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,8,64,0,1,float16,fp8,0,0.1288533310095469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,8,64,0,1,fp8,fp8,0,0.17407999436060587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,40,64,0,1,float16,float16,0,0.07935999830563863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,40,64,0,1,float16,fp8,0,0.07833600044250488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,40,64,0,1,fp8,fp8,0,0.09830400347709656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,1,64,0,1,float16,float16,0,0.07543466488520305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,1,64,0,1,float16,fp8,0,0.07679999868075053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,1,64,0,1,fp8,fp8,0,0.09608532985051473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,2,64,0,1,float16,float16,0,0.07611733178297679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,2,64,0,1,float16,fp8,0,0.07714133461316426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,2,64,0,1,fp8,fp8,0,0.09779199957847595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,4,64,0,1,float16,float16,0,0.077824001510938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,4,64,0,1,float16,fp8,0,0.0773119976123174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,8,64,0,1,float16,float16,0,0.0766293356815974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,4,64,0,1,fp8,fp8,0,0.09796266754468282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,8,64,0,1,float16,fp8,0,0.07594666878382365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,40,64,0,1,float16,float16,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,8,64,0,1,fp8,fp8,0,0.09710933764775594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,40,64,0,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,40,64,0,1,fp8,fp8,0,0.05495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,1,64,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,1,64,0,1,float16,float16,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,1,64,0,1,fp8,fp8,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,2,64,0,1,float16,float16,0,0.04420266548792521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,2,64,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,2,64,0,1,fp8,fp8,0,0.05376000205675761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,4,64,0,1,float16,float16,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,4,64,0,1,float16,fp8,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,4,64,0,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,8,64,0,1,float16,float16,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,8,64,0,1,fp8,fp8,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,8,64,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,40,64,0,1,float16,float16,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,40,64,0,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,40,64,0,1,fp8,fp8,0,0.03822933385769526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,1,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,1,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,1,64,0,1,fp8,fp8,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,2,64,0,1,float16,float16,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,2,64,0,1,float16,fp8,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,2,64,0,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,4,64,0,1,float16,float16,0,0.028330666323502857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,4,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,4,64,0,1,fp8,fp8,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,8,64,0,1,float16,float16,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,8,64,0,1,fp8,fp8,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,8,64,0,1,float16,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,1,64,0,1,float16,float16,0,2.564095973968506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,1,64,0,1,fp8,fp8,0,2.297856012980143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,1,64,0,1,float16,fp8,0,2.5518080393473306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,2,64,0,1,float16,float16,0,2.792448043823242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,2,64,0,1,float16,fp8,0,2.7682132720947266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,2,64,0,1,fp8,fp8,0,2.4890027046203613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,4,64,0,1,float16,float16,0,2.9441706339518228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,4,64,0,1,float16,fp8,0,2.8928000132242837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,4,64,0,1,fp8,fp8,0,2.6967039108276367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,8,64,0,1,float16,float16,0,3.3373867670694985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,8,64,0,1,float16,fp8,0,3.24232546488444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,8,64,0,1,fp8,fp8,0,3.0037333170572915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,40,64,0,1,float16,float16,0,3.1098880767822266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,40,64,0,1,float16,fp8,0,2.9218133290608725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,1,64,0,1,float16,float16,0,1.2059306303660076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,40,64,0,1,fp8,fp8,0,2.4209067026774087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,1,64,0,1,float16,fp8,0,1.1953492959340413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,1,64,0,1,fp8,fp8,0,1.1209386984507244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,2,64,0,1,float16,float16,0,1.3387093544006348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,2,64,0,1,float16,fp8,0,1.3323946793874104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,2,64,0,1,fp8,fp8,0,1.1893760363260906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,4,64,0,1,float16,float16,0,1.3742079734802246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,4,64,0,1,float16,fp8,0,1.3484373092651367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,4,64,0,1,fp8,fp8,0,1.253717343012492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,8,64,0,1,float16,float16,0,1.5622827212015789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,8,64,0,1,float16,fp8,0,1.5240532557169597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,8,64,0,1,fp8,fp8,0,1.3791573842366536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,40,64,0,1,float16,float16,0,1.4946986834208171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,40,64,0,1,float16,fp8,0,1.4085119565327961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,1,64,0,1,float16,float16,0,0.5314559936523438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,40,64,0,1,fp8,fp8,0,1.1900586287180583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,1,64,0,1,float16,fp8,0,0.5261653264363607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,1,64,0,1,fp8,fp8,0,0.5802666743596395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,2,64,0,1,float16,float16,0,0.5678079922993978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,2,64,0,1,float16,fp8,0,0.5394773483276367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,2,64,0,1,fp8,fp8,0,0.5853866736094157
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,4,64,0,1,float16,float16,0,0.6026240189870199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,4,64,0,1,float16,fp8,0,0.5789013306299845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,4,64,0,1,fp8,fp8,0,0.628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,8,64,0,1,float16,float16,0,0.7043413321177164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,8,64,0,1,float16,fp8,0,0.677717367808024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,8,64,0,1,fp8,fp8,0,0.6818133195241293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,40,64,0,1,float16,float16,0,0.6720853646596273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,40,64,0,1,float16,fp8,0,0.6191786527633667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,40,64,0,1,fp8,fp8,0,0.5865813493728638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,1,64,0,1,float16,fp8,0,0.20411733786265054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,1,64,0,1,fp8,fp8,0,0.23381332556406656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,1,64,0,1,float16,float16,0,0.19660800695419312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,2,64,0,1,float16,float16,0,0.19114667177200317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,2,64,0,1,float16,fp8,0,0.1867093245188395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,2,64,0,1,fp8,fp8,0,0.24985599517822266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,4,64,0,1,float16,float16,0,0.20480000972747803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,4,64,0,1,float16,fp8,0,0.20036266247431436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,8,64,0,1,float16,float16,0,0.22016000747680664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,4,64,0,1,fp8,fp8,0,0.2672640085220337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,8,64,0,1,float16,fp8,0,0.211626668771108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,8,64,0,1,fp8,fp8,0,0.31778132915496826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,40,64,0,1,float16,float16,0,0.1532586713631948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,40,64,0,1,float16,fp8,0,0.11912533640861511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,40,64,0,1,fp8,fp8,0,0.27613866329193115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,1,64,0,1,float16,float16,0,0.09471999605496724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,1,64,0,1,float16,fp8,0,0.09762133161226909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,1,64,0,1,fp8,fp8,0,0.12083199620246887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,2,64,0,1,float16,float16,0,0.09745066364606221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,2,64,0,1,float16,fp8,0,0.09437867005666097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,4,64,0,1,float16,float16,0,0.09523199995358785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,2,64,0,1,fp8,fp8,0,0.12236799796422322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,4,64,0,1,float16,fp8,0,0.09454933802286784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,4,64,0,1,fp8,fp8,0,0.12339199582735698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,8,64,0,1,float16,float16,0,0.09676800171534221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,8,64,0,1,float16,fp8,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,8,64,0,1,fp8,fp8,0,0.12526933352152506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,40,64,0,1,float16,float16,0,0.05836800237496694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,40,64,0,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,40,64,0,1,fp8,fp8,0,0.07014399766921997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,1,64,0,1,float16,float16,0,0.054101333022117615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,1,64,0,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,1,64,0,1,fp8,fp8,0,0.06929066777229309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,2,64,0,1,float16,float16,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,2,64,0,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,2,64,0,1,fp8,fp8,0,0.06963199873765309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,4,64,0,1,float16,fp8,0,0.05614933371543884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,4,64,0,1,float16,float16,0,0.056320001681645714
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,4,64,0,1,fp8,fp8,0,0.06894933183987935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,8,64,0,1,float16,float16,0,0.05649066468079885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,8,64,0,1,float16,fp8,0,0.0554666668176651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,8,64,0,1,fp8,fp8,0,0.06980266670385997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,40,64,0,1,float16,float16,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,40,64,0,1,float16,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,40,64,0,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,1,64,0,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,1,64,0,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,1,64,0,1,fp8,fp8,0,0.04232533276081085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,2,64,0,1,float16,float16,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,2,64,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,2,64,0,1,fp8,fp8,0,0.043007999658584595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,4,64,0,1,float16,float16,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,4,64,0,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,4,64,0,1,fp8,fp8,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,8,64,0,1,float16,float16,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,8,64,0,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,8,64,0,1,fp8,fp8,0,0.04266666869322459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,40,64,0,1,float16,float16,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,40,64,0,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,40,64,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,1,64,0,1,float16,float16,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,1,64,0,1,float16,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,2,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,1,64,0,1,fp8,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,2,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,2,64,0,1,fp8,fp8,0,0.028330666323502857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,4,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,4,64,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,4,64,0,1,fp8,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,8,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,8,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,40,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,8,64,0,1,fp8,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,40,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,40,64,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,1,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,1,64,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,1,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,2,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,2,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,2,64,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,4,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,4,64,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,4,64,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,8,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,8,64,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,8,64,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,1,64,0,1,float16,float16,0,1.203541358311971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,1,64,0,1,float16,fp8,0,1.2001279989878337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,1,64,0,1,fp8,fp8,0,0.9833813508351644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,2,64,0,1,float16,float16,0,1.2704426447550456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,2,64,0,1,fp8,fp8,0,1.0238293011983235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,2,64,0,1,float16,fp8,0,1.25764266649882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,4,64,0,1,float16,float16,0,1.36789337793986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,4,64,0,1,float16,fp8,0,1.3632853825887044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,4,64,0,1,fp8,fp8,0,1.102677345275879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,8,64,0,1,float16,float16,0,1.5930026372273762
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,8,64,0,1,float16,fp8,0,1.5563093821207683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,8,64,0,1,fp8,fp8,0,1.2306773662567139
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,1,64,0,1,float16,float16,0,0.49851731459299725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,40,64,0,1,fp8,fp8,0,1.1675306955973308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,40,64,0,1,float16,float16,0,1.498794714609782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,40,64,0,1,float16,fp8,0,1.3996373812357585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,1,64,0,1,float16,fp8,0,0.4939093192418416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,2,64,0,1,float16,float16,0,0.5497173468271891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,1,64,0,1,fp8,fp8,0,0.4940799872080485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,2,64,0,1,float16,fp8,0,0.5430613358815511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,4,64,0,1,float16,float16,0,0.6050133307774862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,2,64,0,1,fp8,fp8,0,0.5017600059509277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,4,64,0,1,float16,fp8,0,0.5899946689605713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,4,64,0,1,fp8,fp8,0,0.5382826725641886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,8,64,0,1,float16,float16,0,0.719701369603475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,8,64,0,1,float16,fp8,0,0.6930773258209229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,8,64,0,1,fp8,fp8,0,0.6084266503651937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,40,64,0,1,float16,float16,0,0.6818133195241293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,40,64,0,1,float16,fp8,0,0.628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,40,64,0,1,fp8,fp8,0,0.5760000149408976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,1,64,0,1,float16,float16,0,0.14591999848683676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,1,64,0,1,float16,fp8,0,0.14865066607793173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,1,64,0,1,fp8,fp8,0,0.1867093245188395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,2,64,0,1,float16,float16,0,0.1513813336690267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,2,64,0,1,float16,fp8,0,0.1513813336690267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,2,64,0,1,fp8,fp8,0,0.1976319948832194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,4,64,0,1,float16,float16,0,0.159061332543691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,4,64,0,1,float16,fp8,0,0.1604266663392385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,4,64,0,1,fp8,fp8,0,0.22630399465560913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,8,64,0,1,float16,float16,0,0.19353600343068442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,8,64,0,1,float16,fp8,0,0.17595734198888144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,8,64,0,1,fp8,fp8,0,0.2788693308830261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,40,64,0,1,float16,float16,0,0.15223466356595358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,40,64,0,1,float16,fp8,0,0.10700800021489461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,40,64,0,1,fp8,fp8,0,0.2583893338839213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,1,64,0,1,float16,float16,0,0.07355733215808868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,1,64,0,1,float16,fp8,0,0.07236266632874806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,1,64,0,1,fp8,fp8,0,0.09540266791979472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,2,64,0,1,float16,float16,0,0.07423999905586243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,2,64,0,1,float16,fp8,0,0.07338666419188182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,2,64,0,1,fp8,fp8,0,0.09471999605496724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,4,64,0,1,float16,float16,0,0.07509333391984303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,4,64,0,1,float16,fp8,0,0.07321600119272868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,4,64,0,1,fp8,fp8,0,0.09608532985051473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,8,64,0,1,float16,float16,0,0.07458133498827617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,8,64,0,1,fp8,fp8,0,0.09727999567985535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,8,64,0,1,float16,fp8,0,0.07406933108965556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,40,64,0,1,float16,float16,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,40,64,0,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,40,64,0,1,fp8,fp8,0,0.05836800237496694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,1,64,0,1,float16,float16,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,1,64,0,1,float16,fp8,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,1,64,0,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,2,64,0,1,float16,float16,0,0.043178667624791466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,2,64,0,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,2,64,0,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,4,64,0,1,float16,float16,0,0.04266666869322459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,4,64,0,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,4,64,0,1,fp8,fp8,0,0.05819733440876007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,8,64,0,1,float16,float16,0,0.04420266548792521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,8,64,0,1,fp8,fp8,0,0.05836800237496694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,8,64,0,1,float16,fp8,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,40,64,0,1,float16,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,40,64,0,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,40,64,0,1,fp8,fp8,0,0.03822933385769526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,1,64,0,1,float16,float16,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,1,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,1,64,0,1,fp8,fp8,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,2,64,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,2,64,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,2,64,0,1,fp8,fp8,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,4,64,0,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,4,64,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,4,64,0,1,fp8,fp8,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,8,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,8,64,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,8,64,0,1,fp8,fp8,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,40,64,0,1,float16,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,40,64,0,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,40,64,0,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,1,64,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,1,64,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,1,64,0,1,fp8,fp8,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,2,64,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,2,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,2,64,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,4,64,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,4,64,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,4,64,0,1,fp8,fp8,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,8,64,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,8,64,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,8,64,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,40,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,40,64,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,40,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,1,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,1,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,1,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,2,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,2,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,2,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,4,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,4,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,4,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,8,64,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,8,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,8,64,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,40,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,40,64,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,40,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,1,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,1,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,1,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,2,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,2,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,4,64,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,4,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,2,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,4,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,8,64,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,8,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,8,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,1,64,0,1,float16,float16,0,0.5099519888559977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,1,64,0,1,float16,fp8,0,0.5125120083491007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,1,64,0,1,fp8,fp8,0,0.636245330174764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,2,64,0,1,float16,float16,0,0.5492053429285685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,2,64,0,1,float16,fp8,0,0.5396480162938436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,2,64,0,1,fp8,fp8,0,0.6567253271738688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,4,64,0,1,float16,float16,0,0.6068906784057617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,4,64,0,1,float16,fp8,0,0.6038186550140381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,4,64,0,1,fp8,fp8,0,0.7075839837392172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,8,64,0,1,float16,float16,0,0.7355733712514242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,8,64,0,1,float16,fp8,0,0.7077546914418539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,40,64,0,1,float16,float16,0,0.6843732992808024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,8,64,0,1,fp8,fp8,0,0.764245351155599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,40,64,0,1,float16,fp8,0,0.6316373348236084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,40,64,0,1,fp8,fp8,0,0.6251519918441772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,1,64,0,1,float16,float16,0,0.12288000186284383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,1,64,0,1,float16,fp8,0,0.1204906702041626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,1,64,0,1,fp8,fp8,0,0.26265599330266315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,2,64,0,1,float16,fp8,0,0.13243732849756876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,2,64,0,1,float16,float16,0,0.13755733768145242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,2,64,0,1,fp8,fp8,0,0.274944007396698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,4,64,0,1,float16,float16,0,0.15035733580589294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,4,64,0,1,float16,fp8,0,0.14472533265749613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,4,64,0,1,fp8,fp8,0,0.3015679915746053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,8,64,0,1,float16,float16,0,0.19404800732930502
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,8,64,0,1,float16,fp8,0,0.17339734236399332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,40,64,0,1,float16,fp8,0,0.10154666503270467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,40,64,0,1,float16,float16,0,0.1430186629295349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,8,64,0,1,fp8,fp8,0,0.3575466473897298
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,40,64,0,1,fp8,fp8,0,0.2930346727371216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,1,64,0,1,float16,float16,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,1,64,0,1,float16,fp8,0,0.059903999169667564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,1,64,0,1,fp8,fp8,0,0.12970667084058127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,2,64,0,1,float16,float16,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,2,64,0,1,fp8,fp8,0,0.13038933277130127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,4,64,0,1,float16,float16,0,0.06126933296521505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,4,64,0,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,4,64,0,1,fp8,fp8,0,0.13090133666992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,8,64,0,1,float16,float16,0,0.062122667829195656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,2,64,0,1,float16,fp8,0,0.059903999169667564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,8,64,0,1,float16,fp8,0,0.06178133189678192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,8,64,0,1,fp8,fp8,0,0.13209600249926248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,40,64,0,1,float16,fp8,0,0.039936001102129616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,40,64,0,1,float16,float16,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,40,64,0,1,fp8,fp8,0,0.07679999868075053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,1,64,0,1,float16,float16,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,1,64,0,1,float16,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,1,64,0,1,fp8,fp8,0,0.07372800012429555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,2,64,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,2,64,0,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,2,64,0,1,fp8,fp8,0,0.07526400188604991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,4,64,0,1,float16,float16,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,4,64,0,1,float16,fp8,0,0.03822933385769526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,4,64,0,1,fp8,fp8,0,0.07526400188604991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,8,64,0,1,float16,float16,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,8,64,0,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,8,64,0,1,fp8,fp8,0,0.07628799974918365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,40,64,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,40,64,0,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,40,64,0,1,fp8,fp8,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,1,64,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,1,64,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,1,64,0,1,fp8,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,2,64,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,2,64,0,1,float16,float16,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,2,64,0,1,fp8,fp8,0,0.04727466901143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,4,64,0,1,float16,float16,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,4,64,0,1,float16,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,4,64,0,1,fp8,fp8,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,8,64,0,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,8,64,0,1,float16,float16,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,8,64,0,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,40,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,40,64,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,40,64,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,1,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,1,64,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,1,64,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,2,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,2,64,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,2,64,0,1,fp8,fp8,0,0.0315733328461647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,4,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,4,64,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,4,64,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,8,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,8,64,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,8,64,0,1,fp8,fp8,0,0.0315733328461647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,40,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,40,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,40,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,1,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,1,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,1,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,2,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,2,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,2,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,4,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,4,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,4,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,8,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,8,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,8,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,40,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,40,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,40,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,1,64,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,1,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,1,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,2,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,2,64,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,2,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,4,64,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,4,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,4,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,8,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,8,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,8,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,40,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,40,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,40,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,2,64,0,1,float16,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,4,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,8,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,8,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,8,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,1,64,0,1,float16,float16,0,0.1346560021241506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,1,64,0,1,float16,fp8,0,0.13192533453305563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,1,64,0,1,fp8,fp8,0,0.42581331729888916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,2,64,0,1,float16,float16,0,0.14199466506640115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,2,64,0,1,float16,fp8,0,0.14028799533843994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,2,64,0,1,fp8,fp8,0,0.4411733150482178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,4,64,0,1,float16,float16,0,0.15479466319084167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,4,64,0,1,float16,fp8,0,0.144896000623703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,4,64,0,1,fp8,fp8,0,0.4659200112024943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,8,64,0,1,float16,float16,0,0.18858667214711508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,8,64,0,1,float16,fp8,0,0.17373865842819214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,8,64,0,1,fp8,fp8,0,0.5207039912541708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,40,64,0,1,float16,float16,0,0.1518933375676473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,40,64,0,1,float16,fp8,0,0.10734933614730835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,40,64,0,1,fp8,fp8,0,0.3729066848754883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,1,64,0,1,float16,fp8,0,0.06604800124963124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,1,64,0,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,2,64,0,1,float16,float16,0,0.06656000018119812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,1,64,0,1,fp8,fp8,0,0.21230934063593546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,2,64,0,1,float16,fp8,0,0.06690133114655812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,2,64,0,1,fp8,fp8,0,0.21282132466634116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,4,64,0,1,float16,float16,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,4,64,0,1,float16,fp8,0,0.06673066814740498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,4,64,0,1,fp8,fp8,0,0.21282132466634116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,8,64,0,1,float16,fp8,0,0.0682666649421056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,8,64,0,1,fp8,fp8,0,0.21384533246358237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,8,64,0,1,float16,float16,0,0.06809600194295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,40,64,0,1,float16,float16,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,40,64,0,1,float16,fp8,0,0.04027733455101649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,40,64,0,1,fp8,fp8,0,0.1181013286113739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,1,64,0,1,float16,float16,0,0.03908266623814901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,1,64,0,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,1,64,0,1,fp8,fp8,0,0.11707733074824016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,2,64,0,1,float16,float16,0,0.03942399968703588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,2,64,0,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,2,64,0,1,fp8,fp8,0,0.1160533328851064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,4,64,0,1,float16,float16,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,4,64,0,1,float16,fp8,0,0.039936001102129616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,4,64,0,1,fp8,fp8,0,0.116565336783727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,8,64,0,1,float16,float16,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,8,64,0,1,float16,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,8,64,0,1,fp8,fp8,0,0.11673600474993388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,40,64,0,1,float16,float16,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,40,64,0,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,40,64,0,1,fp8,fp8,0,0.0679253339767456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,1,64,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,1,64,0,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,2,64,0,1,float16,float16,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,1,64,0,1,fp8,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,2,64,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,2,64,0,1,fp8,fp8,0,0.06775466601053874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,4,64,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,4,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,4,64,0,1,fp8,fp8,0,0.06724266707897186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,8,64,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,8,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,8,64,0,1,fp8,fp8,0,0.0679253339767456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,40,64,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,40,64,0,1,fp8,fp8,0,0.04215466479460398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,40,64,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,1,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,1,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,1,64,0,1,fp8,fp8,0,0.041984001795450844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,2,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,2,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,2,64,0,1,fp8,fp8,0,0.041984001795450844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,4,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,4,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,4,64,0,1,fp8,fp8,0,0.04232533276081085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,8,64,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,8,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,40,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,8,64,0,1,fp8,fp8,0,0.04215466479460398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,40,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,40,64,0,1,fp8,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,1,64,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,1,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,1,64,0,1,fp8,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,2,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,2,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,2,64,0,1,fp8,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,4,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,4,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,4,64,0,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,8,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,8,64,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,8,64,0,1,fp8,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,40,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,40,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,40,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,1,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,1,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,1,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,2,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,2,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,2,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,4,64,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,4,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,4,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,8,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,8,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,8,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,40,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,40,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,40,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,1,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,1,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,1,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,2,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,2,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,2,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,4,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,4,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,4,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,8,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,8,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,8,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,40,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,40,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,40,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,1,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,2,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,2,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,8,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,8,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,1,64,0,1,float16,float16,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,1,64,0,1,float16,fp8,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,2,64,0,1,float16,float16,0,0.10717866818110149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,1,64,0,1,fp8,fp8,0,0.37887998421986896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,2,64,0,1,float16,fp8,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,2,64,0,1,fp8,fp8,0,0.37956265608469647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,4,64,0,1,float16,float16,0,0.09727999567985535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,4,64,0,1,float16,fp8,0,0.09710933764775594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,8,64,0,1,float16,float16,0,0.09796266754468282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,4,64,0,1,fp8,fp8,0,0.3804159959157308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,8,64,0,1,float16,fp8,0,0.09796266754468282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,40,64,0,1,float16,float16,0,0.05358933409055074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,8,64,0,1,fp8,fp8,0,0.3821226755777995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,40,64,0,1,float16,fp8,0,0.05205333232879639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,1,64,0,1,float16,float16,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,40,64,0,1,fp8,fp8,0,0.20155733823776245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,1,64,0,1,float16,fp8,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,1,64,0,1,fp8,fp8,0,0.1976319948832194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,2,64,0,1,float16,float16,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,2,64,0,1,float16,fp8,0,0.053077335158983864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,2,64,0,1,fp8,fp8,0,0.1986560026804606
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,4,64,0,1,float16,float16,0,0.05358933409055074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,4,64,0,1,float16,fp8,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,4,64,0,1,fp8,fp8,0,0.19848533471425375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,8,64,0,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,8,64,0,1,float16,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,8,64,0,1,fp8,fp8,0,0.1991680065790812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,40,64,0,1,float16,float16,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,40,64,0,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,1,64,0,1,float16,float16,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,40,64,0,1,fp8,fp8,0,0.10820266604423523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,1,64,0,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,1,64,0,1,fp8,fp8,0,0.1083733340104421
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,2,64,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,2,64,0,1,float16,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,2,64,0,1,fp8,fp8,0,0.10854400197664897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,4,64,0,1,float16,float16,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,4,64,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,4,64,0,1,fp8,fp8,0,0.1083733340104421
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,8,64,0,1,float16,float16,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,8,64,0,1,float16,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,8,64,0,1,fp8,fp8,0,0.10803199807802837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,40,64,0,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,40,64,0,1,float16,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,40,64,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,1,64,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,1,64,0,1,float16,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,1,64,0,1,fp8,fp8,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,2,64,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,2,64,0,1,float16,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,2,64,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,4,64,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,4,64,0,1,float16,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,4,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,8,64,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,8,64,0,1,float16,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,8,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,40,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,40,64,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,40,64,0,1,fp8,fp8,0,0.03822933385769526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,1,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,1,64,0,1,fp8,fp8,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,1,64,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,2,64,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,2,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,2,64,0,1,fp8,fp8,0,0.038912000755469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,4,64,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,4,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,4,64,0,1,fp8,fp8,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,8,64,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,8,64,0,1,fp8,fp8,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,40,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,8,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,40,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,40,64,0,1,fp8,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,1,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,1,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,1,64,0,1,fp8,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,2,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,2,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,2,64,0,1,fp8,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,4,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,4,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,4,64,0,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,8,64,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,40,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,8,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,8,64,0,1,fp8,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,40,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,40,64,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,1,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,1,64,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,2,64,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,4,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,4,64,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,8,64,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,40,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,40,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,40,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,1,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,1,64,0,1,float16,fp8,0,0.009503999724984169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,2,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,4,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,4,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,8,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,8,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,8,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,40,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,40,64,0,1,float16,fp8,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,40,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,1,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,1,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,4,64,0,1,float16,fp8,0,0.008853333070874214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,4,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,8,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,8,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,8,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,float16,0,61.34049987792969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,fp8,0,60.7278086344401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,float16,0,61.476521809895836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,fp8,0,61.2150624593099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,fp8,0,60.807169596354164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,float16,0,62.8497060139974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,1,64,0,1,fp8,fp8,0,79.34020487467448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,2,64,0,1,fp8,fp8,0,79.44345601399739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,float16,0,31.085909525553387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,fp8,0,31.169024149576824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,float16,0,30.349141438802082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,32,64,0,1,fp8,fp8,0,41.07298024495443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,fp8,0,60.81058247884115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,float16,0,60.776275634765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,4,64,0,1,fp8,fp8,0,80.70639038085938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,fp8,0,30.441983540852863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,8,64,0,1,fp8,fp8,0,81.1890360514323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,1,64,0,1,fp8,fp8,0,39.09700266520182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,float16,0,30.078633626302082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,fp8,0,30.22882080078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,float16,0,29.72979227701823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,2,64,0,1,fp8,fp8,0,38.60002136230469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,fp8,0,29.79157257080078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,float16,0,29.627220153808594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,float16,0,15.753557840983072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,4,64,0,1,fp8,fp8,0,38.97412363688151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,fp8,0,30.150484720865887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,fp8,0,15.5687255859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,32,64,0,1,fp8,fp8,0,20.240895589192707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,float16,0,15.39959462483724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,8,64,0,1,fp8,fp8,0,39.2630615234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,fp8,0,15.814144134521484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,1,64,0,1,fp8,fp8,0,19.491840362548828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,float16,0,15.286613464355469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,fp8,0,15.478101094563803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,2,64,0,1,fp8,fp8,0,19.381418863932293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,float16,0,15.096661885579428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,fp8,0,15.343616485595703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,4,64,0,1,fp8,fp8,0,19.525973002115887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,float16,0,15.236437479654947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,float16,0,8.48520533243815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,fp8,0,15.311018625895182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,fp8,0,8.241151809692383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,8,64,0,1,fp8,fp8,0,19.664554595947266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,float16,0,7.710378646850586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,32,64,0,1,fp8,fp8,0,10.193407694498697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,fp8,0,7.615146636962891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,1,64,0,1,fp8,fp8,0,9.872383753458658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,float16,0,7.472810745239258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,fp8,0,7.825237274169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,fp8,0,7.692288080851237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,float16,0,7.78871472676595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,2,64,0,1,fp8,fp8,0,9.899007797241211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,4,64,0,1,fp8,fp8,0,9.938943862915039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,float16,0,7.563434600830078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,fp8,0,7.450794855753581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,8,64,0,1,fp8,fp8,0,10.135381062825521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,float16,0,35.04366811116537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,fp8,0,35.10135396321615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,float16,0,34.707115173339844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,fp8,0,34.63048553466797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,1,64,0,1,fp8,fp8,0,44.10675048828125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,float16,0,34.404693603515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,fp8,0,34.95816548665365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,2,64,0,1,fp8,fp8,0,44.552703857421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,float16,0,17.83722686767578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,fp8,0,18.44991938273112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,float16,0,17.92733891805013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,float16,0,34.849108378092446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,32,64,0,1,fp8,fp8,0,23.755775451660156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,fp8,0,34.5169932047526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,4,64,0,1,fp8,fp8,0,45.34801228841146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,8,64,0,1,fp8,fp8,0,45.361490885416664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,fp8,0,17.50698725382487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,float16,0,17.74677276611328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,1,64,0,1,fp8,fp8,0,22.092458089192707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,fp8,0,17.591637929280598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,float16,0,17.12179183959961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,fp8,0,17.593685150146484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,2,64,0,1,fp8,fp8,0,22.40870412190755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,4,64,0,1,fp8,fp8,0,22.153897603352863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,float16,0,9.33120028177897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,float16,0,17.229141235351562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,fp8,0,17.29587173461914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,fp8,0,9.624917348225912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,float16,0,9.315839767456055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,32,64,0,1,fp8,fp8,0,11.905194600423178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,8,64,0,1,fp8,fp8,0,22.442838033040363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,fp8,0,8.892074584960938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,float16,0,9.286997477213541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,1,64,0,1,fp8,fp8,0,11.123541514078775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,fp8,0,8.905557632446289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,float16,0,9.21992556254069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,fp8,0,9.063423792521158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,2,64,0,1,fp8,fp8,0,11.101183573404947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,4,64,0,1,fp8,fp8,0,11.28277333577474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,float16,0,9.023317337036133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,fp8,0,9.010517120361328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,float16,0,4.1057281494140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,fp8,0,4.66210142771403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,8,64,0,1,fp8,fp8,0,11.299669901529947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,float16,0,4.036608060201009
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,32,64,0,1,fp8,fp8,0,6.004735946655273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,fp8,0,4.299263954162598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,1,64,0,1,fp8,fp8,0,5.637802759806315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,float16,0,3.8787412643432617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,fp8,0,4.164949417114258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,float16,0,3.7234347661336265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,2,64,0,1,fp8,fp8,0,5.746517181396484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,fp8,0,4.198741277058919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,float16,0,3.8961493174235025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,4,64,0,1,fp8,fp8,0,5.7849171956380205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,fp8,0,4.172458648681641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,8,64,0,1,fp8,fp8,0,5.686784108479817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,float16,0,24.88268788655599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,fp8,0,24.718505859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,float16,0,24.585044860839844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,fp8,0,24.47820790608724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,1,64,0,1,fp8,fp8,0,31.246505737304688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,2,64,0,1,fp8,fp8,0,31.446187337239582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,float16,0,24.387413024902344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,fp8,0,24.353622436523438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,float16,0,13.678421020507812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,fp8,0,13.218816121419271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,float16,0,24.67430369059245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,float16,0,12.676949818929037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,4,64,0,1,fp8,fp8,0,31.407615661621094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,fp8,0,24.322219848632812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,32,64,0,1,fp8,fp8,0,16.593407948811848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,8,64,0,1,fp8,fp8,0,32.068949381510414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,fp8,0,13.528917948404947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,float16,0,12.72610092163086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,1,64,0,1,fp8,fp8,0,15.653887430826822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,fp8,0,12.498432159423828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,fp8,0,12.472320556640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,float16,0,12.472148895263672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,2,64,0,1,fp8,fp8,0,15.488170623779297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,4,64,0,1,fp8,fp8,0,15.676586151123047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,float16,0,12.924245198567709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,float16,0,6.285312016805013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,fp8,0,6.476970672607422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,fp8,0,12.578815460205078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,float16,0,5.29032548268636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,32,64,0,1,fp8,fp8,0,8.393215815226236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,8,64,0,1,fp8,fp8,0,15.944703420003256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,fp8,0,6.315690358479817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,float16,0,5.9311784108479815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,1,64,0,1,fp8,fp8,0,7.934975941975911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,fp8,0,6.353578567504883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,float16,0,6.316885630289714
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,2,64,0,1,fp8,fp8,0,7.862272262573242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,fp8,0,6.034261067708333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,float16,0,6.321322758992513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,4,64,0,1,fp8,fp8,0,8.00716781616211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,float16,0,2.994175910949707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,fp8,0,2.97216002146403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,fp8,0,6.218922932942708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,32,64,0,1,fp8,fp8,0,4.144810676574707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,float16,0,2.72981325785319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,8,64,0,1,fp8,fp8,0,8.070144017537435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,fp8,0,2.797909418741862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,1,64,0,1,fp8,fp8,0,3.917824109395345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,float16,0,2.6871468226114907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,fp8,0,2.6391894022623696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,2,64,0,1,fp8,fp8,0,3.9115091959635415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,float16,0,2.8090025583902993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,fp8,0,2.650965372721354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,4,64,0,1,fp8,fp8,0,4.000085194905599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,float16,0,2.7984212239583335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,fp8,0,2.86737060546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,8,64,0,1,fp8,fp8,0,3.919189453125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,float16,0,32.98235829671224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,fp8,0,32.74615478515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,float16,0,33.083221435546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,fp8,0,33.54999542236328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,1,64,0,1,fp8,fp8,0,40.554667154947914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,float16,0,33.01068878173828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,2,64,0,1,fp8,fp8,0,42.05994669596354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,fp8,0,32.42735036214193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,float16,0,17.88040542602539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,fp8,0,17.59129587809245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,float16,0,16.43349329630534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,32,64,0,1,fp8,fp8,0,22.173355102539062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,float16,0,32.39816538492838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,fp8,0,33.07520039876302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,4,64,0,1,fp8,fp8,0,42.47654469807943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,8,64,0,1,fp8,fp8,0,43.92345682779948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,fp8,0,16.528554280598957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,float16,0,16.346282958984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,1,64,0,1,fp8,fp8,0,20.25881576538086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,fp8,0,16.367956797281902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,float16,0,15.959381103515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,2,64,0,1,fp8,fp8,0,20.132010142008465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,fp8,0,16.33945592244466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,4,64,0,1,fp8,fp8,0,20.69384511311849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,float16,0,16.449535369873047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,float16,0,8.951637268066406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,fp8,0,8.909653345743815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,fp8,0,16.207530975341797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,float16,0,7.99078369140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,32,64,0,1,fp8,fp8,0,11.056981404622396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,8,64,0,1,fp8,fp8,0,21.02084223429362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,fp8,0,8.506879806518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,1,64,0,1,fp8,fp8,0,10.077695846557617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,float16,0,8.182101567586264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,fp8,0,8.42086410522461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,float16,0,8.109055836995443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,2,64,0,1,fp8,fp8,0,10.080256144205729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,fp8,0,8.008021036783854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,4,64,0,1,fp8,fp8,0,10.271402359008789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,float16,0,8.895999908447266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,fp8,0,8.384000142415365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,float16,0,4.087807973225911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,fp8,0,4.0272213617960615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,float16,0,3.4932053883870444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,8,64,0,1,fp8,fp8,0,10.28488540649414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,32,64,0,1,fp8,fp8,0,5.434026718139648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,fp8,0,3.969365437825521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,1,64,0,1,fp8,fp8,0,5.084501266479492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,float16,0,3.8111572265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,fp8,0,3.67633056640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,float16,0,3.4034347534179688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,fp8,0,3.7253119150797525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,2,64,0,1,fp8,fp8,0,5.032448132832845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,4,64,0,1,fp8,fp8,0,5.042858759562175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,float16,0,3.9314772288004556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,fp8,0,3.9055360158284507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,fp8,0,1.9522560437520344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,float16,0,1.9937280019124348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,float16,0,1.8549760182698567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,8,64,0,1,fp8,fp8,0,5.107199986775716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,32,64,0,1,fp8,fp8,0,2.695850690205892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,fp8,0,1.8232320149739583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,1,64,0,1,fp8,fp8,0,2.533717314402262
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,float16,0,1.7846612930297852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,fp8,0,1.835349400838216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,float16,0,1.8846720059712727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,fp8,0,1.848149299621582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,2,64,0,1,fp8,fp8,0,2.4990720748901367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,4,64,0,1,fp8,fp8,0,2.539519945780436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,float16,0,1.8686292966206868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,fp8,0,1.795072078704834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,8,64,0,1,fp8,fp8,0,2.5333760579427085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,float16,0,19.297108968098957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,fp8,0,19.487743377685547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,fp8,0,18.970111846923828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,float16,0,19.053909301757812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,1,64,0,1,fp8,fp8,0,23.205716451009113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,float16,0,19.33687464396159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,2,64,0,1,fp8,fp8,0,23.52025604248047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,fp8,0,18.83562723795573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,float16,0,10.209791819254557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,fp8,0,10.450431823730469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,float16,0,19.462997436523438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,32,64,0,1,fp8,fp8,0,12.960768381754557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,fp8,0,18.64789326985677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,float16,0,10.19050661722819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,4,64,0,1,fp8,fp8,0,24.10632578531901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,8,64,0,1,fp8,fp8,0,24.618153889973957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,fp8,0,9.549482981363932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,float16,0,9.83296012878418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,1,64,0,1,fp8,fp8,0,11.650559743245443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,fp8,0,9.885013580322266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,float16,0,9.83193588256836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,2,64,0,1,fp8,fp8,0,11.643391927083334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,fp8,0,9.609045028686523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,4,64,0,1,fp8,fp8,0,11.69595718383789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,float16,0,10.032469431559244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,float16,0,5.0135040283203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,float16,0,3.9772160847981772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,fp8,0,5.007530530293782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,32,64,0,1,fp8,fp8,0,6.51690673828125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,fp8,0,9.504597345987955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,8,64,0,1,fp8,fp8,0,12.053162892659506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,fp8,0,4.549461364746094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,1,64,0,1,fp8,fp8,0,5.798378626505534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,float16,0,4.448767979939778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,fp8,0,4.489215850830078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,2,64,0,1,fp8,fp8,0,5.878954569498698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,float16,0,4.234069188435872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,fp8,0,4.053674697875977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,4,64,0,1,fp8,fp8,0,5.946197509765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,float16,0,4.168874740600586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,fp8,0,4.15283203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,float16,0,2.437119960784912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,fp8,0,2.429098606109619
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,float16,0,1.9833173751831055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,8,64,0,1,fp8,fp8,0,5.910869598388672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,32,64,0,1,fp8,fp8,0,3.1660372416178384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,fp8,0,2.066943963368734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,1,64,0,1,fp8,fp8,0,2.789717356363932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,float16,0,1.9483307202657063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,fp8,0,1.9570345878601074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,float16,0,1.9384320576985676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,2,64,0,1,fp8,fp8,0,2.846207936604818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,fp8,0,1.938602606455485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,4,64,0,1,fp8,fp8,0,2.8282880783081055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,float16,0,2.0770133336385093
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,fp8,0,1.979904015858968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,float16,0,1.1006293296813965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,8,64,0,1,fp8,fp8,0,2.89740784962972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,fp8,0,1.0489173730214436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,32,64,0,1,fp8,fp8,0,1.6203093528747559
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,float16,0,1.076906681060791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,fp8,0,1.0707626342773438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,1,64,0,1,fp8,fp8,0,1.4713172912597656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,float16,0,1.1356159845987956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,fp8,0,1.1100160280863445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,2,64,0,1,fp8,fp8,0,1.46670929590861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,float16,0,1.112063964207967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,fp8,0,1.0516479810078938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,float16,0,1.0381653308868408
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,4,64,0,1,fp8,fp8,0,1.4624427159627278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,fp8,0,1.0516479810078938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,8,64,0,1,fp8,fp8,0,1.4621013005574544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,float16,0,18.349056243896484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,fp8,0,18.91635258992513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,fp8,0,19.3076909383138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,float16,0,19.672405242919922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,1,64,0,1,fp8,fp8,0,21.984939575195312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,2,64,0,1,fp8,fp8,0,23.07720438639323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,float16,0,19.202901204427082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,fp8,0,18.993663787841797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,float16,0,10.533717473347982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,fp8,0,10.419541041056315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,float16,0,8.930133183797201
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,float16,0,19.17678960164388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,32,64,0,1,fp8,fp8,0,12.946773529052734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,4,64,0,1,fp8,fp8,0,23.542442321777344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,fp8,0,19.089920043945312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,8,64,0,1,fp8,fp8,0,24.87176513671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,fp8,0,9.105578740437826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,float16,0,9.084927876790365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,1,64,0,1,fp8,fp8,0,10.680831909179688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,fp8,0,9.389397303263346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,float16,0,9.018709182739258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,2,64,0,1,fp8,fp8,0,10.979498545328775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,fp8,0,9.430528004964193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,4,64,0,1,fp8,fp8,0,11.254955291748047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,float16,0,9.128789265950521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,float16,0,5.174101193745931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,fp8,0,4.967082659403483
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,float16,0,4.003669420878093
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,fp8,0,9.432234446207682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,32,64,0,1,fp8,fp8,0,6.325248082478841
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,8,64,0,1,fp8,fp8,0,11.31537119547526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,fp8,0,4.418560028076172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,float16,0,4.208810806274414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,1,64,0,1,fp8,fp8,0,5.316778818766276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,fp8,0,4.195157368977864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,2,64,0,1,fp8,fp8,0,5.364565531412761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,float16,0,4.0352427164713545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,fp8,0,4.2968746821085615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,4,64,0,1,fp8,fp8,0,5.39784558614095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,float16,0,4.484437306722005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,fp8,0,4.306943893432617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,float16,0,2.4901973406473794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,fp8,0,2.4437759717305503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,8,64,0,1,fp8,fp8,0,5.540181477864583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,float16,0,1.894741376241048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,32,64,0,1,fp8,fp8,0,3.0259199142456055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,fp8,0,1.8257919947306316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,1,64,0,1,fp8,fp8,0,2.6072746912638345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,float16,0,1.8706773122151692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,fp8,0,1.8286933898925781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,2,64,0,1,fp8,fp8,0,2.598911921183268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,float16,0,1.9092480341593425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,fp8,0,1.9083946545918782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,4,64,0,1,fp8,fp8,0,2.6243413289388022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,float16,0,1.951573371887207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,fp8,0,1.9945813814798992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,8,64,0,1,fp8,fp8,0,2.689706802368164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,float16,0,1.1801599661509197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,fp8,0,1.150976022084554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,float16,0,0.959658702214559
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,32,64,0,1,fp8,fp8,0,1.5489706993103027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,fp8,0,0.9292799631754557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,1,64,0,1,fp8,fp8,0,1.2868266900380452
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,float16,0,0.9451519648234049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,fp8,0,0.9241600036621094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,2,64,0,1,fp8,fp8,0,1.296895980834961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,float16,0,0.9335467020670573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,fp8,0,0.942250649134318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,4,64,0,1,fp8,fp8,0,1.2963840166727703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,float16,0,0.9111893177032471
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,fp8,0,0.9057280222574869
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,8,64,0,1,fp8,fp8,0,1.330858627955119
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,float16,0,0.5167786677678426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,fp8,0,0.5164373318354288
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,32,64,0,1,fp8,fp8,0,0.7551999886830648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,float16,0,0.5336746772130331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,fp8,0,0.5382826725641886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,1,64,0,1,fp8,fp8,0,0.711680014928182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,float16,0,0.5476693312327067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,fp8,0,0.5312853256861368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,2,64,0,1,fp8,fp8,0,0.718506654103597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,float16,0,0.5555200179417928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,fp8,0,0.5463039875030518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,float16,0,0.5232640107472738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,4,64,0,1,fp8,fp8,0,0.7191893259684244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,fp8,0,0.5251413186391195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,8,64,0,1,fp8,fp8,0,0.7128746509552002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,float16,0,11.179008483886719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,fp8,0,11.148970286051432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,1,64,0,1,fp8,fp8,0,12.700501759847006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,float16,0,11.298133850097656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,fp8,0,10.845354715983072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,float16,0,10.843648274739584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,2,64,0,1,fp8,fp8,0,13.312511444091797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,fp8,0,11.153748830159506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,4,64,0,1,fp8,fp8,0,13.688660939534506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,float16,0,6.752255757649739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,float16,0,11.502763112386068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,fp8,0,6.385663986206055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,float16,0,5.108053207397461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,32,64,0,1,fp8,fp8,0,7.753557205200195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,fp8,0,11.236863454182943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,8,64,0,1,fp8,fp8,0,14.162943522135416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,fp8,0,5.060437202453613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,1,64,0,1,fp8,fp8,0,6.234282811482747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,float16,0,5.230762799580892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,fp8,0,5.238442738850911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,2,64,0,1,fp8,fp8,0,6.290773391723633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,float16,0,5.318655967712402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,fp8,0,5.251584053039551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,4,64,0,1,fp8,fp8,0,6.452735900878906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,float16,0,5.36251703898112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,float16,0,3.2494932810465493
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,fp8,0,5.0728960037231445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,float16,0,2.2671359380086265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,fp8,0,3.102378527323405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,8,64,0,1,fp8,fp8,0,6.642005284627278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,32,64,0,1,fp8,fp8,0,3.747669219970703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,fp8,0,2.3504212697347007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,1,64,0,1,fp8,fp8,0,3.063466707865397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,float16,0,2.3215786616007485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,fp8,0,2.3031466801961265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,2,64,0,1,fp8,fp8,0,3.0353066126505532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,float16,0,2.3540053367614746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,fp8,0,2.3362560272216797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,4,64,0,1,fp8,fp8,0,3.1102294921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,float16,0,2.562730630238851
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,float16,0,1.5419732729593914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,float16,0,1.071786642074585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,fp8,0,2.5425920486450195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,fp8,0,1.4580052693684895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,32,64,0,1,fp8,fp8,0,1.8529280026753743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,8,64,0,1,fp8,fp8,0,3.1955626805623374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,fp8,0,1.094655990600586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,1,64,0,1,fp8,fp8,0,1.4962347348531086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,float16,0,1.0830506483713787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,fp8,0,1.05949862798055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,float16,0,1.07042129834493
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,2,64,0,1,fp8,fp8,0,1.507157325744629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,fp8,0,1.0629119873046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,float16,0,1.1572906970977783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,4,64,0,1,fp8,fp8,0,1.522175947825114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,fp8,0,1.1234986782073975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,float16,0,0.6596266825993856
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,8,64,0,1,fp8,fp8,0,1.5802027384440105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,fp8,0,0.5935786565144857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,32,64,0,1,fp8,fp8,0,0.951807975769043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,float16,0,0.5582506656646729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,fp8,0,0.570026675860087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,1,64,0,1,fp8,fp8,0,0.7760213216145834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,float16,0,0.5655893484751383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,fp8,0,0.5454506476720175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,float16,0,0.5524479945500692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,2,64,0,1,fp8,fp8,0,0.7751680215199789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,fp8,0,0.5623466571172079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,float16,0,0.5690026680628458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,4,64,0,1,fp8,fp8,0,0.7746559778849283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,fp8,0,0.5524479945500692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,8,64,0,1,fp8,fp8,0,0.788821299870809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,float16,0,0.3206826647122701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,fp8,0,0.31249066193898517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,32,64,0,1,fp8,fp8,0,0.45073068141937256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,float16,0,0.3165866732597351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,fp8,0,0.31829333305358887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,1,64,0,1,fp8,fp8,0,0.4363946517308553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,float16,0,0.31641600529352826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,fp8,0,0.3131733338038127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,2,64,0,1,fp8,fp8,0,0.4307626485824585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,float16,0,0.3256319959958394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,fp8,0,0.32358400026957196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,float16,0,0.3141973416010539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,4,64,0,1,fp8,fp8,0,0.43775999546051025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,fp8,0,0.31112533807754517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,8,64,0,1,fp8,fp8,0,0.43537068367004395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,float16,0,10.78442637125651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,fp8,0,11.070976257324219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,float16,0,11.128490447998047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,1,64,0,1,fp8,fp8,0,12.71176528930664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,fp8,0,11.41418711344401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,float16,0,11.22918446858724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,fp8,0,11.304618835449219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,2,64,0,1,fp8,fp8,0,13.81341807047526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,4,64,0,1,fp8,fp8,0,14.090410868326822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,float16,0,5.281279881795247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,float16,0,7.5391998291015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,fp8,0,7.018325169881185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,32,64,0,1,fp8,fp8,0,8.128512064615885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,float16,0,12.105728149414062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,fp8,0,11.654655456542969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,8,64,0,1,fp8,fp8,0,14.968149820963541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,fp8,0,5.0022398630778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,1,64,0,1,fp8,fp8,0,6.078293482462565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,fp8,0,5.0119679768880205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,float16,0,5.227519989013672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,float16,0,5.219840049743652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,2,64,0,1,fp8,fp8,0,6.147071838378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,fp8,0,5.113514582316081
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,4,64,0,1,fp8,fp8,0,6.358869552612305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,float16,0,5.503146489461263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,float16,0,3.6160853703816733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,fp8,0,3.374762535095215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,fp8,0,5.497173309326172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,float16,0,2.346325397491455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,32,64,0,1,fp8,fp8,0,3.8347094853719077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,8,64,0,1,fp8,fp8,0,6.562986373901367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,fp8,0,2.313216050465902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,1,64,0,1,fp8,fp8,0,2.896042823791504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,fp8,0,2.3780694007873535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,float16,0,2.434559981028239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,float16,0,2.490880012512207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,2,64,0,1,fp8,fp8,0,2.9637972513834634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,fp8,0,2.407423973083496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,4,64,0,1,fp8,fp8,0,3.0003201166788735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,float16,0,2.601642608642578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,fp8,0,2.5866239865620932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,float16,0,1.7218559583028157
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,float16,0,1.057792027791341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,fp8,0,1.6008532842000325
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,32,64,0,1,fp8,fp8,0,1.9217066764831543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,8,64,0,1,fp8,fp8,0,3.116373380025228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,fp8,0,1.0492586294809978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,1,64,0,1,fp8,fp8,0,1.425920009613037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,float16,0,1.0953386624654133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,fp8,0,1.0967040061950684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,float16,0,1.1141119798024495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,2,64,0,1,fp8,fp8,0,1.4378666877746582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,fp8,0,1.1217919985453289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,float16,0,1.2072959740956624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,4,64,0,1,fp8,fp8,0,1.4885546366373699
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,fp8,0,1.1618986924489338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,8,64,0,1,fp8,fp8,0,1.5561386744181316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,fp8,0,0.7181653181711832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,float16,0,0.7811413606007894
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,float16,0,0.53111465771993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,32,64,0,1,fp8,fp8,0,0.9716053009033203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,fp8,0,0.5198506514231364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,1,64,0,1,fp8,fp8,0,0.7099733352661133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,float16,0,0.5073920090993246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,fp8,0,0.5307733217875162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,2,64,0,1,fp8,fp8,0,0.7113386789957682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,float16,0,0.5309439897537231
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,fp8,0,0.5247999827067057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,4,64,0,1,fp8,fp8,0,0.718506654103597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,fp8,0,0.5167786677678426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,float16,0,0.5167786677678426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,8,64,0,1,fp8,fp8,0,0.7488853136698405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,float16,0,0.29576534032821655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,fp8,0,0.2921813329060872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,32,64,0,1,fp8,fp8,0,0.45977600415547687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,float16,0,0.27323732773462933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,fp8,0,0.26436267296473187
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,1,64,0,1,fp8,fp8,0,0.3839999834696452
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,float16,0,0.26282666126887005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,fp8,0,0.26828799645106
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,2,64,0,1,fp8,fp8,0,0.3860479990641276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,float16,0,0.2769920031229655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,fp8,0,0.2769920031229655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,4,64,0,1,fp8,fp8,0,0.39048532644907635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,float16,0,0.274944007396698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,fp8,0,0.2701653242111206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,8,64,0,1,fp8,fp8,0,0.3877546787261963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,float16,0,0.16503467162450156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,fp8,0,0.16554666558901468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,32,64,0,1,fp8,fp8,0,0.2379093368848165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,fp8,0,0.169813334941864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,float16,0,0.17066667477289835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,1,64,0,1,fp8,fp8,0,0.2326186696688334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,float16,0,0.16742400328318277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,2,64,0,1,fp8,fp8,0,0.23347200949986777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,fp8,0,0.16810667514801025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,float16,0,0.169813334941864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,fp8,0,0.1687893271446228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,4,64,0,1,fp8,fp8,0,0.23227733373641968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,fp8,0,0.16691199938456217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,8,64,0,1,fp8,fp8,0,0.2326186696688334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,float16,0,0.16725333531697592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,float16,0,6.322175979614258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,fp8,0,6.315690358479817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,1,64,0,1,fp8,fp8,0,7.460010528564453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,float16,0,6.6298878987630205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,fp8,0,6.704469045003255
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,float16,0,6.756181081136067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,fp8,0,6.802773157755534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,2,64,0,1,fp8,fp8,0,7.832746505737305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,4,64,0,1,fp8,fp8,0,8.17134920756022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,float16,0,7.298218409220378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,fp8,0,7.354368209838867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,float16,0,2.9798399607340493
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,fp8,0,4.55236275990804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,float16,0,4.895061175028483
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,32,64,0,1,fp8,fp8,0,5.084842681884766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,8,64,0,1,fp8,fp8,0,8.84394645690918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,fp8,0,2.9598719278971353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,1,64,0,1,fp8,fp8,0,3.495253245035807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,float16,0,3.1086934407552085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,fp8,0,3.0344533920288086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,2,64,0,1,fp8,fp8,0,3.6017494201660156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,float16,0,3.221162796020508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,fp8,0,3.199146588643392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,4,64,0,1,fp8,fp8,0,3.7340161005655923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,float16,0,3.395925203959147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,fp8,0,3.321173350016276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,8,64,0,1,fp8,fp8,0,3.9161173502604165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,float16,0,2.302805264790853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,fp8,0,2.169343948364258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,float16,0,1.43394136428833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,32,64,0,1,fp8,fp8,0,2.4683519999186196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,fp8,0,1.3987840016682942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,1,64,0,1,fp8,fp8,0,1.7268053690592449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,float16,0,1.4242134094238281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,fp8,0,1.4399147033691406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,float16,0,1.488042672475179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,2,64,0,1,fp8,fp8,0,1.7483092943827312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,fp8,0,1.501695950826009
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,float16,0,1.622869332631429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,4,64,0,1,fp8,fp8,0,1.819818655649821
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,fp8,0,1.5672319730122883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,8,64,0,1,fp8,fp8,0,1.8880853652954102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,float16,0,1.087999979654948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,fp8,0,0.9985706806182861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,32,64,0,1,fp8,fp8,0,1.222314675649007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,float16,0,0.6338560183842977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,fp8,0,0.6142293214797974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,1,64,0,1,fp8,fp8,0,0.8616960048675537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,float16,0,0.618837316830953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,fp8,0,0.6159360011418661
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,2,64,0,1,fp8,fp8,0,0.86135466893514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,float16,0,0.6331733465194702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,fp8,0,0.6287360191345215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,float16,0,0.7026346524556478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,4,64,0,1,fp8,fp8,0,0.9011200269063314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,fp8,0,0.6848853429158529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,8,64,0,1,fp8,fp8,0,0.9521493117014567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,float16,0,0.4322986602783203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,fp8,0,0.3701759974161784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,32,64,0,1,fp8,fp8,0,0.6191786527633667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,float16,0,0.31539199749628705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,fp8,0,0.3184640010197957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,1,64,0,1,fp8,fp8,0,0.4358826478322347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,float16,0,0.31692800919214886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,fp8,0,0.3104426662127177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,float16,0,0.32665600379308063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,2,64,0,1,fp8,fp8,0,0.43110398451487225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,fp8,0,0.3141973416010539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,4,64,0,1,fp8,fp8,0,0.4370773235956828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,float16,0,0.32153600454330444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,fp8,0,0.32665600379308063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,8,64,0,1,fp8,fp8,0,0.4427093267440796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,float16,0,0.18158932526906332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,fp8,0,0.18227199713389078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,32,64,0,1,fp8,fp8,0,0.250709335009257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,float16,0,0.16964266697565714
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,fp8,0,0.16827734311421713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,1,64,0,1,fp8,fp8,0,0.23637332518895468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,float16,0,0.16810667514801025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,fp8,0,0.16708266735076904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,2,64,0,1,fp8,fp8,0,0.23569067319234213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,float16,0,0.16793600718180338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,fp8,0,0.16810667514801025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,4,64,0,1,fp8,fp8,0,0.23705599705378214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,float16,0,0.16554666558901468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,fp8,0,0.16605866948763529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,8,64,0,1,fp8,fp8,0,0.2397866646448771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,float16,0,0.11246933539708455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,fp8,0,0.11161599556605022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,32,64,0,1,fp8,fp8,0,0.14813866217931113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,float16,0,0.1153706709543864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,fp8,0,0.11468799908955891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,1,64,0,1,fp8,fp8,0,0.14830933014551798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,float16,0,0.11417599519093831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,2,64,0,1,fp8,fp8,0,0.14882133404413858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,fp8,0,0.11468799908955891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,float16,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,fp8,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,4,64,0,1,fp8,fp8,0,0.144896000623703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,float16,0,0.11246933539708455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,8,64,0,1,fp8,fp8,0,0.14830933014551798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,fp8,0,0.1129813293615977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,float16,0,6.799530665079753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,fp8,0,6.8075517018636065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,1,64,0,1,fp8,fp8,0,7.45796267191569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,float16,0,7.1058775583903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,fp8,0,6.88162104288737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,2,64,0,1,fp8,fp8,0,7.712767918904622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,fp8,0,6.84663454691569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,float16,0,7.004330952962239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,4,64,0,1,fp8,fp8,0,7.969962437947591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,float16,0,7.791445414225261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,fp8,0,7.440042495727539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,fp8,0,5.333845138549805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,float16,0,5.618005116780599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,float16,0,3.221162796020508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,8,64,0,1,fp8,fp8,0,8.310954411824545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,32,64,0,1,fp8,fp8,0,5.509461085001628
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,fp8,0,3.227135976155599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,1,64,0,1,fp8,fp8,0,3.610111872355143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,float16,0,3.319978713989258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,fp8,0,3.2948907216389975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,float16,0,3.4510507583618164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,2,64,0,1,fp8,fp8,0,3.705002784729004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,fp8,0,3.435178756713867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,4,64,0,1,fp8,fp8,0,3.8292481104532876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,float16,0,3.779072125752767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,fp8,0,3.7067092259724936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,float16,0,2.758485476175944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,float16,0,1.5423146883646648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,fp8,0,2.610688050587972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,32,64,0,1,fp8,fp8,0,2.689706802368164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,8,64,0,1,fp8,fp8,0,4.016127904256185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,fp8,0,1.5276373227437336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,1,64,0,1,fp8,fp8,0,1.7517226537068684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,float16,0,1.5912960370381672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,fp8,0,1.5465812683105469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,2,64,0,1,fp8,fp8,0,1.7978026072184246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,float16,0,1.6457386016845703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,fp8,0,1.6208213170369465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,4,64,0,1,fp8,fp8,0,1.8589013417561848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,float16,0,1.7759572664896648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,fp8,0,1.7448959350585938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,float16,0,1.3119146823883057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,8,64,0,1,fp8,fp8,0,1.9752960205078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,fp8,0,1.2361386617024739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,fp8,0,0.6662826538085938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,float16,0,0.668842633565267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,32,64,0,1,fp8,fp8,0,1.3349547386169434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,1,64,0,1,fp8,fp8,0,0.8656213283538818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,float16,0,0.696832021077474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,fp8,0,0.682154655456543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,float16,0,0.7338666915893555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,fp8,0,0.7080960273742676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,2,64,0,1,fp8,fp8,0,0.891050656636556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,float16,0,0.8096426328023275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,4,64,0,1,fp8,fp8,0,0.9246719678243002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,fp8,0,0.7746559778849283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,8,64,0,1,fp8,fp8,0,0.9958399931589762
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,float16,0,0.5724159876505533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,fp8,0,0.5246293147404989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,32,64,0,1,fp8,fp8,0,0.6804479757944742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,fp8,0,0.32255999247233075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,float16,0,0.32870399951934814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,1,64,0,1,fp8,fp8,0,0.4184746742248535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,float16,0,0.3083946704864502
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,fp8,0,0.3176106611887614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,2,64,0,1,fp8,fp8,0,0.4184746742248535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,fp8,0,0.31249066193898517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,4,64,0,1,fp8,fp8,0,0.4246186812718709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,float16,0,0.32716800769170123
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,fp8,0,0.31112533807754517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,float16,0,0.3269973397254944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,8,64,0,1,fp8,fp8,0,0.45721598466237384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,float16,0,0.17169066270192465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,fp8,0,0.16657066345214844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,32,64,0,1,fp8,fp8,0,0.29764266808827716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,float16,0,0.1609386702378591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,fp8,0,0.16315733393033346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,1,64,0,1,fp8,fp8,0,0.22016000747680664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,float16,0,0.16264533003171286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,fp8,0,0.16622933745384216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,2,64,0,1,fp8,fp8,0,0.22050132354100546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,float16,0,0.16025599837303162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,fp8,0,0.16674133141835532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,4,64,0,1,fp8,fp8,0,0.2198186715443929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,float16,0,0.16008533040682474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,fp8,0,0.159061332543691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,8,64,0,1,fp8,fp8,0,0.2230613430341085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,float16,0,0.09847467144330342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,fp8,0,0.09864532947540283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,32,64,0,1,fp8,fp8,0,0.1302186648050944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,float16,0,0.0962559978167216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,fp8,0,0.09762133161226909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,1,64,0,1,fp8,fp8,0,0.12800000111262003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,float16,0,0.09642666578292847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,fp8,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,2,64,0,1,fp8,fp8,0,0.12526933352152506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,float16,0,0.09676800171534221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,fp8,0,0.09608532985051473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,float16,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,4,64,0,1,fp8,fp8,0,0.12782933314641318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,fp8,0,0.09540266791979472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,8,64,0,1,fp8,fp8,0,0.12851199507713318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,float16,0,0.0580266664425532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,fp8,0,0.05819733440876007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,32,64,0,1,fp8,fp8,0,0.07031466563542683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,float16,0,0.0580266664425532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,fp8,0,0.05853866537412008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,1,64,0,1,fp8,fp8,0,0.07065600156784058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,float16,0,0.057002668579419456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,fp8,0,0.05614933371543884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,2,64,0,1,fp8,fp8,0,0.07031466563542683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,float16,0,0.05819733440876007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,4,64,0,1,fp8,fp8,0,0.07014399766921997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,fp8,0,0.05819733440876007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,float16,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,fp8,0,0.05649066468079885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,8,64,0,1,fp8,fp8,0,0.07048533360163371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,1,64,0,1,float16,float16,0,4.951039950052897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,1,64,0,1,float16,fp8,0,5.072042783101399
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,1,64,0,1,fp8,fp8,0,5.007701237996419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,2,64,0,1,float16,float16,0,5.25875186920166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,2,64,0,1,float16,fp8,0,5.284864107767741
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,2,64,0,1,fp8,fp8,0,5.274112065633138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,4,64,0,1,float16,float16,0,5.398357391357422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,4,64,0,1,float16,fp8,0,5.305856068929036
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,4,64,0,1,fp8,fp8,0,5.457237243652344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,8,64,0,1,float16,float16,0,6.1542402903238935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,8,64,0,1,float16,fp8,0,5.972650527954102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,1,64,0,1,float16,float16,0,2.3594667116800943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,32,64,0,1,float16,float16,0,5.151914596557617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,8,64,0,1,fp8,fp8,0,5.976064046223958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,32,64,0,1,float16,fp8,0,4.818431854248047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,32,64,0,1,fp8,fp8,0,4.478122711181641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,1,64,0,1,float16,fp8,0,2.3162879943847656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,1,64,0,1,fp8,fp8,0,2.4316587448120117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,2,64,0,1,float16,float16,0,2.4799572626749673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,2,64,0,1,float16,fp8,0,2.4780799547831216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,2,64,0,1,fp8,fp8,0,2.6072746912638345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,4,64,0,1,float16,float16,0,2.6876586278279624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,4,64,0,1,float16,fp8,0,2.580479939778646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,4,64,0,1,fp8,fp8,0,2.701141357421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,8,64,0,1,float16,float16,0,3.0011733373006186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,8,64,0,1,fp8,fp8,0,2.885631879170736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,8,64,0,1,float16,fp8,0,2.9062827428181968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,32,64,0,1,float16,float16,0,2.5088000297546387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,32,64,0,1,float16,fp8,0,2.358784039815267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,1,64,0,1,float16,float16,0,1.1190613110860188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,32,64,0,1,fp8,fp8,0,2.1585920651753745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,1,64,0,1,float16,fp8,0,1.084928035736084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,1,64,0,1,fp8,fp8,0,1.216000000635783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,2,64,0,1,float16,float16,0,1.182207981745402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,2,64,0,1,float16,fp8,0,1.177770694096883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,2,64,0,1,fp8,fp8,0,1.264469305674235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,4,64,0,1,float16,float16,0,1.2941653728485107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,4,64,0,1,float16,fp8,0,1.2243626912434895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,8,64,0,1,float16,float16,0,1.4235307375590007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,4,64,0,1,fp8,fp8,0,1.298261324564616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,8,64,0,1,float16,fp8,0,1.3818880716959636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,8,64,0,1,fp8,fp8,0,1.415679931640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,32,64,0,1,float16,float16,0,1.2001279989878337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,32,64,0,1,float16,fp8,0,1.1151359875996907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,1,64,0,1,float16,float16,0,0.47121067841847736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,32,64,0,1,fp8,fp8,0,1.087829351425171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,1,64,0,1,fp8,fp8,0,0.5887999931971232
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,1,64,0,1,float16,fp8,0,0.4613120158513387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,2,64,0,1,float16,float16,0,0.49237334728240967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,2,64,0,1,float16,fp8,0,0.4795733292897542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,2,64,0,1,fp8,fp8,0,0.6075733502705892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,4,64,0,1,float16,float16,0,0.5382826725641886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,4,64,0,1,float16,fp8,0,0.5220693349838257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,4,64,0,1,fp8,fp8,0,0.6517759958902994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,8,64,0,1,float16,float16,0,0.6306133270263672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,8,64,0,1,float16,fp8,0,0.5973333517710367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,8,64,0,1,fp8,fp8,0,0.7053653399149576
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,32,64,0,1,float16,float16,0,0.49322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,32,64,0,1,float16,fp8,0,0.44014934698740643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,32,64,0,1,fp8,fp8,0,0.5374293327331543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,1,64,0,1,float16,float16,0,0.2182826598485311
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,1,64,0,1,float16,fp8,0,0.21230934063593546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,2,64,0,1,float16,float16,0,0.20872533321380615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,1,64,0,1,fp8,fp8,0,0.26436267296473187
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,2,64,0,1,float16,fp8,0,0.2121386726697286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,2,64,0,1,fp8,fp8,0,0.27033599217732746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,4,64,0,1,float16,float16,0,0.21691733598709106
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,4,64,0,1,float16,fp8,0,0.21504000822703043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,4,64,0,1,fp8,fp8,0,0.2725546757380168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,8,64,0,1,float16,float16,0,0.21640533208847046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,8,64,0,1,float16,fp8,0,0.20974934101104736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,8,64,0,1,fp8,fp8,0,0.31470932563145954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,32,64,0,1,float16,float16,0,0.1250986655553182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,32,64,0,1,float16,fp8,0,0.11161599556605022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,32,64,0,1,fp8,fp8,0,0.2249386707941691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,1,64,0,1,float16,float16,0,0.13738666971524557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,1,64,0,1,float16,fp8,0,0.11127466956774394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,1,64,0,1,fp8,fp8,0,0.1418239971001943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,2,64,0,1,float16,float16,0,0.11110400160153706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,2,64,0,1,float16,fp8,0,0.1114453375339508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,2,64,0,1,fp8,fp8,0,0.14216533303260803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,4,64,0,1,float16,float16,0,0.11195733149846394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,4,64,0,1,float16,fp8,0,0.10990933577219646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,4,64,0,1,fp8,fp8,0,0.14387200276056925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,8,64,0,1,float16,float16,0,0.10905599594116211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,8,64,0,1,float16,fp8,0,0.10803199807802837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,32,64,0,1,float16,float16,0,0.06673066814740498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,8,64,0,1,fp8,fp8,0,0.14284800489743552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,32,64,0,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,32,64,0,1,float16,fp8,0,0.06638933221499126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,1,64,0,1,float16,fp8,0,0.06519466638565063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,1,64,0,1,float16,float16,0,0.06502399841944377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,1,64,0,1,fp8,fp8,0,0.0820906658967336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,2,64,0,1,float16,float16,0,0.06553600231806438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,2,64,0,1,float16,fp8,0,0.06400000055631001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,2,64,0,1,fp8,fp8,0,0.08243200182914734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,4,64,0,1,float16,float16,0,0.06451199948787689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,4,64,0,1,float16,fp8,0,0.06400000055631001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,4,64,0,1,fp8,fp8,0,0.08191999793052673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,8,64,0,1,float16,float16,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,8,64,0,1,float16,fp8,0,0.06434133152167003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,8,64,0,1,fp8,fp8,0,0.08277333279450734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,32,64,0,1,float16,float16,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,32,64,0,1,float16,fp8,0,0.03874133278926214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,32,64,0,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,1,64,0,1,float16,float16,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,1,64,0,1,float16,fp8,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,1,64,0,1,fp8,fp8,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,2,64,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,2,64,0,1,float16,fp8,0,0.038912000755469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,2,64,0,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,4,64,0,1,float16,float16,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,4,64,0,1,float16,fp8,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,4,64,0,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,8,64,0,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,8,64,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,8,64,0,1,fp8,fp8,0,0.04351999859015147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,32,64,0,1,float16,fp8,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,32,64,0,1,float16,float16,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,32,64,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,1,64,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,1,64,0,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,1,64,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,2,64,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,2,64,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,2,64,0,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,4,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,4,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,4,64,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,8,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,8,64,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,8,64,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,1,64,0,1,float16,float16,0,2.0582399368286133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,1,64,0,1,float16,fp8,0,2.0577279726664224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,1,64,0,1,fp8,fp8,0,1.8771626154581706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,2,64,0,1,float16,float16,0,2.2529706954956055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,2,64,0,1,float16,fp8,0,2.2248106002807617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,2,64,0,1,fp8,fp8,0,2.044586658477783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,4,64,0,1,float16,float16,0,2.3877973556518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,4,64,0,1,float16,fp8,0,2.330453395843506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,4,64,0,1,fp8,fp8,0,2.157909393310547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,8,64,0,1,float16,float16,0,2.775210698445638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,8,64,0,1,float16,fp8,0,2.6755412419637046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,8,64,0,1,fp8,fp8,0,2.335573355356852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,1,64,0,1,float16,float16,0,0.9553919633229574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,32,64,0,1,float16,fp8,0,2.350933392842611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,32,64,0,1,float16,float16,0,2.500096003214518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,32,64,0,1,fp8,fp8,0,1.9109546343485515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,1,64,0,1,float16,fp8,0,0.9492479960123698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,1,64,0,1,fp8,fp8,0,0.912384033203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,2,64,0,1,float16,float16,0,1.0589866638183594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,2,64,0,1,float16,fp8,0,1.0359466870625813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,2,64,0,1,fp8,fp8,0,0.9680213133494059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,4,64,0,1,float16,float16,0,1.118890682856242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,4,64,0,1,float16,fp8,0,1.0953386624654133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,4,64,0,1,fp8,fp8,0,1.0221227010091145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,8,64,0,1,float16,float16,0,1.312597354253133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,8,64,0,1,float16,fp8,0,1.2651519775390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,32,64,0,1,float16,float16,0,1.207808017730713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,8,64,0,1,fp8,fp8,0,1.1501226425170898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,32,64,0,1,fp8,fp8,0,0.9412266413370768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,1,64,0,1,float16,float16,0,0.3957759936650594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,32,64,0,1,float16,fp8,0,1.102847973505656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,1,64,0,1,float16,fp8,0,0.38843735059102374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,1,64,0,1,fp8,fp8,0,0.4657493432362874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,2,64,0,1,float16,float16,0,0.4092586835225423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,2,64,0,1,float16,fp8,0,0.3983360131581624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,2,64,0,1,fp8,fp8,0,0.47274665037790936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,4,64,0,1,float16,float16,0,0.4720640182495117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,4,64,0,1,float16,fp8,0,0.4369066556294759
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,4,64,0,1,fp8,fp8,0,0.5104639927546183
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,8,64,0,1,float16,float16,0,0.5642240047454834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,8,64,0,1,float16,fp8,0,0.5283840099970499
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,8,64,0,1,fp8,fp8,0,0.5722453196843466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,32,64,0,1,float16,float16,0,0.4949333270390828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,32,64,0,1,float16,fp8,0,0.4456106821695964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,32,64,0,1,fp8,fp8,0,0.466261347134908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,1,64,0,1,float16,float16,0,0.14882133404413858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,1,64,0,1,float16,fp8,0,0.14967466394106546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,1,64,0,1,fp8,fp8,0,0.18363734086354574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,2,64,0,1,float16,float16,0,0.15633066495259604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,2,64,0,1,float16,fp8,0,0.15052800377209982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,2,64,0,1,fp8,fp8,0,0.18858667214711508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,4,64,0,1,float16,float16,0,0.15155200163523355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,4,64,0,1,float16,fp8,0,0.15581867098808289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,8,64,0,1,float16,float16,0,0.1641813317934672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,4,64,0,1,fp8,fp8,0,0.19234132766723633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,8,64,0,1,float16,fp8,0,0.1518933375676473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,32,64,0,1,float16,float16,0,0.09523199995358785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,8,64,0,1,fp8,fp8,0,0.24064000447591147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,32,64,0,1,float16,fp8,0,0.08430932958920796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,32,64,0,1,fp8,fp8,0,0.17937066157658896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,1,64,0,1,float16,float16,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,1,64,0,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,1,64,0,1,fp8,fp8,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,2,64,0,1,float16,float16,0,0.07867733140786488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,2,64,0,1,float16,fp8,0,0.07987200220425923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,2,64,0,1,fp8,fp8,0,0.10052266716957092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,4,64,0,1,float16,float16,0,0.08072533210118611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,4,64,0,1,float16,fp8,0,0.08055466910203297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,4,64,0,1,fp8,fp8,0,0.10154666503270467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,8,64,0,1,float16,float16,0,0.08072533210118611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,8,64,0,1,float16,fp8,0,0.0820906658967336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,8,64,0,1,fp8,fp8,0,0.10120532910029094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,32,64,0,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,32,64,0,1,float16,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,32,64,0,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,1,64,0,1,float16,float16,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,1,64,0,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,1,64,0,1,fp8,fp8,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,2,64,0,1,float16,float16,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,2,64,0,1,float16,fp8,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,2,64,0,1,fp8,fp8,0,0.059562668204307556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,4,64,0,1,float16,fp8,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,4,64,0,1,float16,float16,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,4,64,0,1,fp8,fp8,0,0.059392000238100685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,8,64,0,1,float16,float16,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,8,64,0,1,float16,fp8,0,0.04607999821503957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,8,64,0,1,fp8,fp8,0,0.06058666606744131
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,32,64,0,1,float16,float16,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,32,64,0,1,float16,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,32,64,0,1,fp8,fp8,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,1,64,0,1,float16,float16,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,1,64,0,1,float16,fp8,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,2,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,1,64,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,2,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,2,64,0,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,4,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,4,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,4,64,0,1,fp8,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,8,64,0,1,float16,float16,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,8,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,8,64,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,32,64,0,1,float16,float16,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,32,64,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,32,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,1,64,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,1,64,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,1,64,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,2,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,2,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,2,64,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,4,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,4,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,4,64,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,8,64,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,8,64,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,8,64,0,1,fp8,fp8,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,32,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,32,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,32,64,0,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,1,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,1,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,1,64,0,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,2,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,2,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,2,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,4,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,4,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,4,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,8,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,8,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,8,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,1,64,0,1,float16,float16,0,0.9480533599853516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,1,64,0,1,float16,fp8,0,0.9413973490397135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,1,64,0,1,fp8,fp8,0,0.8043519655863444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,2,64,0,1,float16,float16,0,1.021781365076701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,2,64,0,1,float16,fp8,0,1.00437331199646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,2,64,0,1,fp8,fp8,0,0.8386560281117758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,4,64,0,1,float16,float16,0,1.1356159845987956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,4,64,0,1,float16,fp8,0,1.1033600171407063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,4,64,0,1,fp8,fp8,0,0.8920746644337972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,8,64,0,1,float16,float16,0,1.3380266825358074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,8,64,0,1,float16,fp8,0,1.2898986339569092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,8,64,0,1,fp8,fp8,0,1.0221227010091145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,32,64,0,1,float16,float16,0,1.1832319895426433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,32,64,0,1,float16,fp8,0,1.1122346719106038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,32,64,0,1,fp8,fp8,0,0.9241600036621094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,1,64,0,1,float16,float16,0,0.339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,1,64,0,1,float16,fp8,0,0.33553067843119305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,1,64,0,1,fp8,fp8,0,0.39031465848286945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,2,64,0,1,float16,float16,0,0.3860479990641276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,2,64,0,1,float16,fp8,0,0.3778560161590576
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,2,64,0,1,fp8,fp8,0,0.3998719851175944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,4,64,0,1,float16,float16,0,0.4608000119527181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,4,64,0,1,float16,fp8,0,0.4394666751225789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,4,64,0,1,fp8,fp8,0,0.44458667437235516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,8,64,0,1,float16,float16,0,0.5893119970957438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,8,64,0,1,float16,fp8,0,0.555178682009379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,32,64,0,1,float16,float16,0,0.5036373138427734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,8,64,0,1,fp8,fp8,0,0.5116586685180664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,32,64,0,1,float16,fp8,0,0.44339199860890705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,1,64,0,1,float16,float16,0,0.11673600474993388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,32,64,0,1,fp8,fp8,0,0.45585068066914874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,1,64,0,1,float16,fp8,0,0.1237333317597707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,1,64,0,1,fp8,fp8,0,0.1469439963499705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,2,64,0,1,float16,float16,0,0.12782933314641318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,2,64,0,1,float16,fp8,0,0.1160533328851064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,2,64,0,1,fp8,fp8,0,0.14847999811172485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,4,64,0,1,float16,float16,0,0.12117333213488261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,4,64,0,1,fp8,fp8,0,0.15172266960144043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,4,64,0,1,float16,fp8,0,0.13056000073750815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,8,64,0,1,float16,float16,0,0.13516799608866373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,8,64,0,1,float16,fp8,0,0.12800000111262003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,8,64,0,1,fp8,fp8,0,0.20974934101104736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,32,64,0,1,float16,float16,0,0.08430932958920796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,32,64,0,1,float16,fp8,0,0.06621866424878438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,32,64,0,1,fp8,fp8,0,0.1621333360671997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,1,64,0,1,float16,float16,0,0.059903999169667564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,1,64,0,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,1,64,0,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,2,64,0,1,float16,float16,0,0.0602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,2,64,0,1,float16,fp8,0,0.05973333120346069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,2,64,0,1,fp8,fp8,0,0.07987200220425923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,4,64,0,1,float16,float16,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,4,64,0,1,fp8,fp8,0,0.07935999830563863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,4,64,0,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,8,64,0,1,float16,float16,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,8,64,0,1,float16,fp8,0,0.06126933296521505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,8,64,0,1,fp8,fp8,0,0.07987200220425923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,32,64,0,1,float16,float16,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,32,64,0,1,float16,fp8,0,0.03908266623814901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,32,64,0,1,fp8,fp8,0,0.04795733094215393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,1,64,0,1,float16,float16,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,1,64,0,1,float16,fp8,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,1,64,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,2,64,0,1,float16,float16,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,2,64,0,1,float16,fp8,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,2,64,0,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,4,64,0,1,float16,float16,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,4,64,0,1,float16,fp8,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,4,64,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,8,64,0,1,float16,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,8,64,0,1,float16,float16,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,8,64,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,32,64,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,32,64,0,1,float16,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,32,64,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,1,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,1,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,1,64,0,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,2,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,2,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,2,64,0,1,fp8,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,4,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,4,64,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,4,64,0,1,fp8,fp8,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,8,64,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,8,64,0,1,float16,fp8,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,8,64,0,1,fp8,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,32,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,32,64,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,32,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,1,64,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,1,64,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,1,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,2,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,2,64,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,2,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,4,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,4,64,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,4,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,8,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,8,64,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,8,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,32,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,32,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,32,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,1,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,1,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,1,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,2,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,2,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,2,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,4,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,4,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,4,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,8,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,8,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,8,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,32,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,32,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,32,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,1,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,1,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,1,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,2,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,2,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,2,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,4,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,4,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,4,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,8,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,8,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,8,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,1,64,0,1,float16,float16,0,0.37034666538238525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,1,64,0,1,float16,fp8,0,0.36181334654490155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,1,64,0,1,fp8,fp8,0,0.5060266653696696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,2,64,0,1,float16,float16,0,0.400383989016215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,2,64,0,1,float16,fp8,0,0.3928746779759725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,2,64,0,1,fp8,fp8,0,0.5333333412806193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,4,64,0,1,float16,float16,0,0.4647253354390462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,4,64,0,1,float16,fp8,0,0.44492801030476886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,4,64,0,1,fp8,fp8,0,0.5727573235829672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,8,64,0,1,float16,float16,0,0.5802666743596395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,8,64,0,1,float16,fp8,0,0.558079997698466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,8,64,0,1,fp8,fp8,0,0.6336853504180908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,32,64,0,1,float16,float16,0,0.5140479803085327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,32,64,0,1,float16,fp8,0,0.452949325243632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,32,64,0,1,fp8,fp8,0,0.493397315343221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,1,64,0,1,float16,float16,0,0.09198932846387227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,1,64,0,1,float16,fp8,0,0.09215999643007915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,1,64,0,1,fp8,fp8,0,0.20087466637293497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,2,64,0,1,float16,float16,0,0.09301333626111348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,2,64,0,1,float16,fp8,0,0.09523199995358785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,4,64,0,1,float16,float16,0,0.10035199920336406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,4,64,0,1,float16,fp8,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,2,64,0,1,fp8,fp8,0,0.20087466637293497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,4,64,0,1,fp8,fp8,0,0.211626668771108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,8,64,0,1,float16,float16,0,0.13209600249926248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,8,64,0,1,float16,fp8,0,0.11878400047620137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,8,64,0,1,fp8,fp8,0,0.2739199995994568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,32,64,0,1,float16,float16,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,32,64,0,1,float16,fp8,0,0.05649066468079885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,32,64,0,1,fp8,fp8,0,0.19694934288660684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,1,64,0,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,1,64,0,1,float16,fp8,0,0.05000533163547516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,1,64,0,1,fp8,fp8,0,0.10683733224868774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,2,64,0,1,float16,float16,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,2,64,0,1,float16,fp8,0,0.0506879985332489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,4,64,0,1,float16,float16,0,0.05120000243186951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,2,64,0,1,fp8,fp8,0,0.10734933614730835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,4,64,0,1,float16,fp8,0,0.0506879985332489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,4,64,0,1,fp8,fp8,0,0.10769066214561462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,8,64,0,1,float16,float16,0,0.05205333232879639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,8,64,0,1,float16,fp8,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,8,64,0,1,fp8,fp8,0,0.10803199807802837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,32,64,0,1,float16,float16,0,0.034304000437259674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,32,64,0,1,float16,fp8,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,32,64,0,1,fp8,fp8,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,1,64,0,1,float16,float16,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,1,64,0,1,float16,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,1,64,0,1,fp8,fp8,0,0.06126933296521505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,2,64,0,1,float16,float16,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,2,64,0,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,4,64,0,1,float16,float16,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,2,64,0,1,fp8,fp8,0,0.061610668897628784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,4,64,0,1,fp8,fp8,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,4,64,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,8,64,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,8,64,0,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,8,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,32,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,32,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,32,64,0,1,fp8,fp8,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,1,64,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,1,64,0,1,float16,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,1,64,0,1,fp8,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,2,64,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,2,64,0,1,float16,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,2,64,0,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,4,64,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,4,64,0,1,fp8,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,4,64,0,1,float16,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,8,64,0,1,float16,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,8,64,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,8,64,0,1,fp8,fp8,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,32,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,32,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,32,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,1,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,1,64,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,1,64,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,2,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,2,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,2,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,4,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,4,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,4,64,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,8,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,8,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,8,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,32,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,32,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,32,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,1,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,1,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,1,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,2,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,2,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,2,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,4,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,4,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,4,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,8,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,8,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,8,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,32,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,32,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,32,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,1,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,1,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,1,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,2,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,2,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,4,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,8,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,8,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,8,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,32,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,32,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,32,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,8,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,8,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,8,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,1,64,0,1,float16,float16,0,0.10410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,1,64,0,1,float16,fp8,0,0.10359467069307964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,1,64,0,1,fp8,fp8,0,0.3326293428738912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,2,64,0,1,float16,fp8,0,0.10513066252072652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,2,64,0,1,float16,float16,0,0.1053013304869334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,2,64,0,1,fp8,fp8,0,0.33382399876912433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,4,64,0,1,float16,float16,0,0.1088853379090627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,4,64,0,1,float16,fp8,0,0.1083733340104421
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,4,64,0,1,fp8,fp8,0,0.3423573176066081
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,8,64,0,1,float16,float16,0,0.1295360028743744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,8,64,0,1,float16,fp8,0,0.11758933464686076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,32,64,0,1,float16,float16,0,0.08703999718030293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,8,64,0,1,fp8,fp8,0,0.3990186850229899
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,32,64,0,1,float16,fp8,0,0.06007466713587443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,32,64,0,1,fp8,fp8,0,0.2616320053736369
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,1,64,0,1,float16,float16,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,1,64,0,1,float16,fp8,0,0.05495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,1,64,0,1,fp8,fp8,0,0.17322667439778647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,2,64,0,1,float16,float16,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,2,64,0,1,float16,fp8,0,0.0554666668176651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,2,64,0,1,fp8,fp8,0,0.1730560064315796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,4,64,0,1,float16,float16,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,4,64,0,1,float16,fp8,0,0.05580799778302511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,4,64,0,1,fp8,fp8,0,0.1728853384653727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,8,64,0,1,float16,float16,0,0.056320001681645714
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,8,64,0,1,fp8,fp8,0,0.17373865842819214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,8,64,0,1,float16,fp8,0,0.05580799778302511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,32,64,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,32,64,0,1,float16,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,32,64,0,1,fp8,fp8,0,0.0981333355108897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,1,64,0,1,float16,float16,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,1,64,0,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,1,64,0,1,fp8,fp8,0,0.09523199995358785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,2,64,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,2,64,0,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,4,64,0,1,float16,float16,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,2,64,0,1,fp8,fp8,0,0.09523199995358785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,4,64,0,1,float16,fp8,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,4,64,0,1,fp8,fp8,0,0.09608532985051473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,8,64,0,1,float16,float16,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,8,64,0,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,8,64,0,1,fp8,fp8,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,32,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,32,64,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,32,64,0,1,fp8,fp8,0,0.05580799778302511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,1,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,1,64,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,1,64,0,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,2,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,2,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,2,64,0,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,4,64,0,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,4,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,8,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,4,64,0,1,fp8,fp8,0,0.0554666668176651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,8,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,8,64,0,1,fp8,fp8,0,0.05580799778302511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,32,64,0,1,float16,float16,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,32,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,32,64,0,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,1,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,1,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,1,64,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,2,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,2,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,2,64,0,1,fp8,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,4,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,4,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,4,64,0,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,8,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,8,64,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,8,64,0,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,32,64,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,32,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,32,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,1,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,1,64,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,1,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,2,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,2,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,2,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,4,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,4,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,4,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,8,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,8,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,8,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,32,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,32,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,32,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,1,64,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,2,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,8,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,4,64,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,8,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,8,64,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,32,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,32,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,32,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,1,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,1,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,1,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,2,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,2,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,4,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,4,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,8,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,8,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,32,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,32,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,32,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,1,64,0,1,float16,fp8,0,0.009365333244204521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,2,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,2,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,4,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,4,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,4,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,8,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,8,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,8,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,1,64,0,1,float16,float16,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,1,64,0,1,fp8,fp8,0,0.30668799082438153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,1,64,0,1,float16,fp8,0,0.07901866734027863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,2,64,0,1,float16,float16,0,0.0795306662718455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,2,64,0,1,float16,fp8,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,2,64,0,1,fp8,fp8,0,0.30702932675679523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,4,64,0,1,float16,fp8,0,0.07987200220425923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,4,64,0,1,float16,float16,0,0.08004266520341237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,8,64,0,1,float16,float16,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,4,64,0,1,fp8,fp8,0,0.3056640028953552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,8,64,0,1,float16,fp8,0,0.07987200220425923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,8,64,0,1,fp8,fp8,0,0.3068586587905884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,32,64,0,1,float16,float16,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,32,64,0,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,32,64,0,1,fp8,fp8,0,0.16383999586105347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,1,64,0,1,float16,float16,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,1,64,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,2,64,0,1,float16,float16,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,1,64,0,1,fp8,fp8,0,0.1616213321685791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,2,64,0,1,float16,fp8,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,2,64,0,1,fp8,fp8,0,0.16179200013478598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,4,64,0,1,float16,float16,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,4,64,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,4,64,0,1,fp8,fp8,0,0.16264533003171286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,8,64,0,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,8,64,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,32,64,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,8,64,0,1,fp8,fp8,0,0.16196266810099283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,32,64,0,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,32,64,0,1,fp8,fp8,0,0.08874666690826416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,1,64,0,1,float16,float16,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,1,64,0,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,1,64,0,1,fp8,fp8,0,0.0885759989420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,2,64,0,1,float16,float16,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,2,64,0,1,float16,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,2,64,0,1,fp8,fp8,0,0.08806399504343669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,4,64,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,4,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,4,64,0,1,fp8,fp8,0,0.08874666690826416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,8,64,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,8,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,32,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,8,64,0,1,fp8,fp8,0,0.08891733487447102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,32,64,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,32,64,0,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,1,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,1,64,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,1,64,0,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,2,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,2,64,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,2,64,0,1,fp8,fp8,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,4,64,0,1,float16,float16,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,4,64,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,4,64,0,1,fp8,fp8,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,8,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,8,64,0,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,32,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,8,64,0,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,32,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,32,64,0,1,fp8,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,1,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,1,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,1,64,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,2,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,2,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,2,64,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,4,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,4,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,4,64,0,1,fp8,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,8,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,8,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,32,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,8,64,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,32,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,32,64,0,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,1,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,2,64,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,4,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,4,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,4,64,0,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,8,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,8,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,8,64,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,32,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,32,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,1,64,0,1,float16,float16,0,0.009205333267649015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,32,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,1,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,2,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,2,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,4,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,4,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,4,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,8,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,8,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,32,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,32,64,0,1,float16,fp8,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,32,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,2,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,4,64,0,1,float16,float16,0,0.007680000116427739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,4,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,4,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,8,64,0,1,float16,float16,0,0.008826666822036108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,8,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,8,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,32,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,32,64,0,1,float16,fp8,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,32,64,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,1,64,0,1,float16,fp8,0,0.008837333569924036
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,1,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,2,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,2,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,4,64,0,1,float16,float16,0,0.007680000116427739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,4,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,8,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,8,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,float16,0,45.5726064046224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,fp8,0,45.3043212890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,float16,0,44.7805430094401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,float16,0,44.72712707519531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,fp8,0,46.69712829589844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,fp8,0,45.281280517578125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,1,64,0,1,fp8,fp8,0,58.57911682128906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,2,64,0,1,fp8,fp8,0,58.96857706705729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,float16,0,23.61292775472005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,fp8,0,23.23046366373698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,float16,0,22.662485758463543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,float16,0,45.34971618652344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,24,64,0,1,fp8,fp8,0,30.65514628092448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,fp8,0,44.723368326822914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,4,64,0,1,fp8,fp8,0,60.042582194010414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,8,64,0,1,fp8,fp8,0,60.79761250813802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,fp8,0,23.275179545084637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,1,64,0,1,fp8,fp8,0,29.271209716796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,float16,0,22.603434244791668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,fp8,0,22.876500447591145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,float16,0,22.145535786946613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,2,64,0,1,fp8,fp8,0,29.143211364746094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,fp8,0,22.550015767415363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,float16,0,11.815252939860025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,float16,0,22.768641153971355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,4,64,0,1,fp8,fp8,0,29.315414428710938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,fp8,0,11.8558718363444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,fp8,0,22.488576253255207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,24,64,0,1,fp8,fp8,0,15.30282719930013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,float16,0,11.75005849202474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,8,64,0,1,fp8,fp8,0,29.454505920410156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,fp8,0,11.763712565104166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,float16,0,11.35974375406901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,1,64,0,1,fp8,fp8,0,14.843221028645834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,fp8,0,11.64578119913737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,2,64,0,1,fp8,fp8,0,14.755327860514322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,float16,0,11.484500885009766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,fp8,0,11.690666198730469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,4,64,0,1,fp8,fp8,0,14.566399892171225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,float16,0,11.71438980102539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,float16,0,5.954389572143555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,fp8,0,11.43057123819987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,float16,0,5.519018809000651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,fp8,0,5.600255966186523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,24,64,0,1,fp8,fp8,0,7.856128056844075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,8,64,0,1,fp8,fp8,0,15.071744283040365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,fp8,0,5.214378674825032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,float16,0,5.491029103597005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,1,64,0,1,fp8,fp8,0,7.488170623779297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,fp8,0,5.649920145670573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,float16,0,5.742080052693685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,2,64,0,1,fp8,fp8,0,7.461034774780273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,fp8,0,5.568341573079427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,4,64,0,1,fp8,fp8,0,7.564629236857097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,float16,0,5.590698877970378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,fp8,0,5.928106943766276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,8,64,0,1,fp8,fp8,0,7.6503041585286455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,float16,0,25.996971130371094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,fp8,0,26.73749287923177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,float16,0,26.251263936360676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,fp8,0,26.413909912109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,1,64,0,1,fp8,fp8,0,33.17913564046224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,float16,0,25.777493794759113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,2,64,0,1,fp8,fp8,0,33.34502410888672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,fp8,0,26.225494384765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,float16,0,13.986986796061197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,float16,0,26.164395650227863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,fp8,0,13.7260373433431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,fp8,0,25.999359130859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,float16,0,13.395968119303385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,4,64,0,1,fp8,fp8,0,33.74267832438151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,24,64,0,1,fp8,fp8,0,17.5817387898763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,8,64,0,1,fp8,fp8,0,34.99537150065104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,fp8,0,13.42361577351888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,float16,0,13.606229146321615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,fp8,0,13.309440612792969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,float16,0,13.033983866373697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,1,64,0,1,fp8,fp8,0,16.778069814046223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,fp8,0,13.117440541585287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,2,64,0,1,fp8,fp8,0,16.65399424235026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,4,64,0,1,fp8,fp8,0,16.94037373860677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,float16,0,13.485909779866537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,float16,0,6.605653127034505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,fp8,0,7.169877370198567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,float16,0,6.506837209065755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,fp8,0,13.209087371826172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,24,64,0,1,fp8,fp8,0,8.86408551534017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,8,64,0,1,fp8,fp8,0,17.029120127360027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,fp8,0,6.395392100016276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,1,64,0,1,fp8,fp8,0,8.408917109171549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,float16,0,5.9043839772542315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,fp8,0,6.833322525024414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,float16,0,6.3633066813151045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,2,64,0,1,fp8,fp8,0,8.308565139770508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,fp8,0,6.6099198659261065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,4,64,0,1,fp8,fp8,0,8.518826802571615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,float16,0,7.091541290283203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,float16,0,3.260416030883789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,fp8,0,6.822912216186523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,fp8,0,3.1102294921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,float16,0,2.998784065246582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,8,64,0,1,fp8,fp8,0,8.553301493326822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,24,64,0,1,fp8,fp8,0,4.4347734451293945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,fp8,0,2.971989313761393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,1,64,0,1,fp8,fp8,0,4.22877852121989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,float16,0,3.0097068150838218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,fp8,0,2.993493398030599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,2,64,0,1,fp8,fp8,0,4.187306722005208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,float16,0,3.0984532038370767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,fp8,0,2.9974187215169272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,4,64,0,1,fp8,fp8,0,4.129621187845866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,float16,0,3.0745598475138345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,fp8,0,2.946218808492025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,8,64,0,1,fp8,fp8,0,4.317354520161946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,float16,0,18.91549809773763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,fp8,0,18.74841562906901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,float16,0,18.456063588460285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,fp8,0,18.456576029459637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,1,64,0,1,fp8,fp8,0,23.059796651204426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,2,64,0,1,fp8,fp8,0,23.424171447753906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,float16,0,18.88392512003581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,fp8,0,18.947755177815754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,float16,0,10.033493041992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,float16,0,18.911914825439453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,fp8,0,18.266624450683594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,fp8,0,10.060458501180014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,4,64,0,1,fp8,fp8,0,23.728810628255207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,24,64,0,1,fp8,fp8,0,12.687872568766275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,float16,0,9.536853154500326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,8,64,0,1,fp8,fp8,0,24.245760599772137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,fp8,0,9.409535725911459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,float16,0,9.343829472859701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,fp8,0,9.336149215698242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,1,64,0,1,fp8,fp8,0,11.617621103922525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,2,64,0,1,fp8,fp8,0,11.709781646728516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,float16,0,9.604437510172525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,fp8,0,9.810944239298502
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,4,64,0,1,fp8,fp8,0,11.775146484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,float16,0,9.541802724202475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,float16,0,4.828672091166179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,fp8,0,9.334101359049479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,fp8,0,4.77781327565511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,float16,0,4.106069246927897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,24,64,0,1,fp8,fp8,0,6.359210968017578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,8,64,0,1,fp8,fp8,0,12.011861165364584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,fp8,0,3.9703893661499023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,float16,0,3.9273811976114907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,1,64,0,1,fp8,fp8,0,5.79857063293457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,fp8,0,4.445866584777832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,2,64,0,1,fp8,fp8,0,5.884415944417317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,float16,0,4.5354665120442705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,fp8,0,4.602197329203288
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,float16,0,4.528469403584798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,4,64,0,1,fp8,fp8,0,5.889535903930664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,fp8,0,4.035072008768718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,float16,0,2.2219093640645347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,fp8,0,2.2205440203348794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,8,64,0,1,fp8,fp8,0,5.975722630818685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,24,64,0,1,fp8,fp8,0,3.118250528971354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,float16,0,2.0787199338277182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,fp8,0,2.098858674367269
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,1,64,0,1,fp8,fp8,0,2.9161812464396157
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,float16,0,2.032298723856608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,fp8,0,2.1601279576619468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,2,64,0,1,fp8,fp8,0,2.910208066304525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,float16,0,2.078378677368164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,float16,0,2.1456212997436523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,fp8,0,2.0643839836120605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,4,64,0,1,fp8,fp8,0,2.9518505732218423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,fp8,0,2.135551929473877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,8,64,0,1,fp8,fp8,0,2.9143040974934897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,float16,0,24.58862813313802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,fp8,0,24.82909901936849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,float16,0,24.740010579427082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,fp8,0,25.138516743977863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,float16,0,24.49390920003255
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,1,64,0,1,fp8,fp8,0,30.607872009277344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,2,64,0,1,fp8,fp8,0,31.912277221679688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,fp8,0,24.596651713053387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,float16,0,12.931925455729166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,fp8,0,13.318826039632162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,float16,0,12.482388814290365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,float16,0,24.94890594482422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,24,64,0,1,fp8,fp8,0,16.973653157552082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,fp8,0,24.464553833007812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,4,64,0,1,fp8,fp8,0,31.744171142578125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,8,64,0,1,fp8,fp8,0,33.050453186035156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,fp8,0,12.114261627197266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,1,64,0,1,fp8,fp8,0,15.049898783365885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,float16,0,12.521984100341797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,fp8,0,13.027498881022135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,float16,0,12.459178924560547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,2,64,0,1,fp8,fp8,0,15.406932830810547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,fp8,0,12.26803207397461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,4,64,0,1,fp8,fp8,0,15.487658182779947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,float16,0,12.46020253499349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,float16,0,6.621866861979167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,fp8,0,6.593706766764323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,fp8,0,12.64401117960612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,float16,0,5.676885604858398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,24,64,0,1,fp8,fp8,0,8.375125249226889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,8,64,0,1,fp8,fp8,0,15.675904591878256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,fp8,0,6.33958371480306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,1,64,0,1,fp8,fp8,0,7.63050651550293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,float16,0,6.234794616699219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,fp8,0,5.9450028737386065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,float16,0,6.085631688435872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,2,64,0,1,fp8,fp8,0,7.60866101582845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,fp8,0,5.913941065470378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,4,64,0,1,fp8,fp8,0,7.608320236206055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,float16,0,6.3663787841796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,fp8,0,6.2124373118082685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,float16,0,2.9943466186523438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,fp8,0,2.967551867167155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,8,64,0,1,fp8,fp8,0,7.826261520385742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,24,64,0,1,fp8,fp8,0,4.074154535929362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,float16,0,2.7136001586914062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,fp8,0,2.6494293212890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,1,64,0,1,fp8,fp8,0,3.755690574645996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,float16,0,2.5292800267537436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,fp8,0,2.5151146252950034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,2,64,0,1,fp8,fp8,0,3.7500588099161782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,float16,0,2.70523738861084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,fp8,0,2.566314697265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,4,64,0,1,fp8,fp8,0,3.805525461832682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,float16,0,2.648746649424235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,fp8,0,2.721280097961426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,float16,0,1.3895680109659831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,8,64,0,1,fp8,fp8,0,3.856383959452311
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,fp8,0,1.3364906311035156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,float16,0,1.4141440391540527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,24,64,0,1,fp8,fp8,0,2.05567995707194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,fp8,0,1.3702826499938965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,1,64,0,1,fp8,fp8,0,1.9111253420511882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,float16,0,1.3791573842366536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,fp8,0,1.3907626469930012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,2,64,0,1,fp8,fp8,0,1.8979840278625488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,fp8,0,1.3975893656412761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,float16,0,1.459712028503418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,4,64,0,1,fp8,fp8,0,1.8967893918355305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,float16,0,1.337173302968343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,fp8,0,1.3230079809824626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,8,64,0,1,fp8,fp8,0,1.9271680514017742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,float16,0,14.618282318115234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,fp8,0,14.35699208577474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,float16,0,14.621354420979818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,fp8,0,14.489259084065756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,1,64,0,1,fp8,fp8,0,17.43445332845052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,float16,0,14.516223907470703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,2,64,0,1,fp8,fp8,0,17.996458689371746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,fp8,0,14.80618667602539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,float16,0,7.717887878417969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,fp8,0,7.686826705932617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,float16,0,15.209643046061197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,24,64,0,1,fp8,fp8,0,9.79694938659668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,fp8,0,14.51810073852539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,4,64,0,1,fp8,fp8,0,18.03332265218099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,float16,0,7.134549458821614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,8,64,0,1,fp8,fp8,0,18.747051239013672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,fp8,0,6.931797027587891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,1,64,0,1,fp8,fp8,0,8.64802106221517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,float16,0,6.87172253926595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,fp8,0,7.283882776896159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,2,64,0,1,fp8,fp8,0,8.714239756266275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,float16,0,6.596096038818359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,fp8,0,7.326037089029948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,4,64,0,1,fp8,fp8,0,8.912384033203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,float16,0,7.090688069661458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,float16,0,3.800405184427897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,fp8,0,6.863360087076823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,fp8,0,3.6297388076782227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,24,64,0,1,fp8,fp8,0,4.753066698710124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,float16,0,3.2150185902913413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,8,64,0,1,fp8,fp8,0,9.127082824707031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,fp8,0,3.1279786427815757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,1,64,0,1,fp8,fp8,0,4.260693232218425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,float16,0,3.03872013092041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,fp8,0,3.0972585678100586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,2,64,0,1,fp8,fp8,0,4.321791966756185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,float16,0,3.162282625834147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,fp8,0,3.303936004638672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,4,64,0,1,fp8,fp8,0,4.364458719889323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,float16,0,3.258709271748861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,fp8,0,3.2547839482625327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,float16,0,1.7846612930297852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,8,64,0,1,fp8,fp8,0,4.532906532287598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,fp8,0,1.7257812817891438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,float16,0,1.516544024149577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,24,64,0,1,fp8,fp8,0,2.370901266733805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,fp8,0,1.462783972422282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,1,64,0,1,fp8,fp8,0,2.0968106587727866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,float16,0,1.4626132647196453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,fp8,0,1.4646612803141277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,2,64,0,1,fp8,fp8,0,2.1034666697184243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,float16,0,1.4549333254496257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,fp8,0,1.4953813552856445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,float16,0,1.4532267252604167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,4,64,0,1,fp8,fp8,0,2.1070507367451987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,fp8,0,1.48633607228597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,8,64,0,1,fp8,fp8,0,2.18180259068807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,float16,0,0.8053759733835856
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,fp8,0,0.8101546764373779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,24,64,0,1,fp8,fp8,0,1.2096853256225586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,float16,0,0.8314879735310873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,fp8,0,0.8657920360565186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,1,64,0,1,fp8,fp8,0,1.1289599736531575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,float16,0,0.8256853421529134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,fp8,0,0.8637440204620361
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,2,64,0,1,fp8,fp8,0,1.1127466360727947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,float16,0,0.8096426328023275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,fp8,0,0.8797866503397623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,float16,0,0.8410453001658121
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,4,64,0,1,fp8,fp8,0,1.1279359658559163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,fp8,0,0.8029867013295492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,8,64,0,1,fp8,fp8,0,1.1373226642608643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,float16,0,14.097578684488932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,fp8,0,13.775360107421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,float16,0,14.240596771240234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,fp8,0,14.144341786702475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,1,64,0,1,fp8,fp8,0,16.833194732666016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,fp8,0,14.207829793294271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,float16,0,14.210047403971354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,2,64,0,1,fp8,fp8,0,17.617237091064453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,float16,0,7.937023798624675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,fp8,0,7.6571305592854815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,float16,0,6.835029602050781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,float16,0,14.236501057942709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,24,64,0,1,fp8,fp8,0,9.522005081176758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,fp8,0,14.11037826538086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,4,64,0,1,fp8,fp8,0,17.823572794596355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,8,64,0,1,fp8,fp8,0,18.809173583984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,fp8,0,6.713514963785808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,1,64,0,1,fp8,fp8,0,8.092501322428385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,float16,0,6.472192128499349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,fp8,0,6.740479787190755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,2,64,0,1,fp8,fp8,0,8.134143829345703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,fp8,0,6.346752166748047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,float16,0,6.716074625651042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,4,64,0,1,fp8,fp8,0,8.435541152954102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,float16,0,3.8828372955322266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,float16,0,6.624767939249675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,float16,0,2.9371732076009116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,fp8,0,3.6747945149739585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,fp8,0,7.181311925252278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,24,64,0,1,fp8,fp8,0,4.6595414479573565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,8,64,0,1,fp8,fp8,0,8.713045120239258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,fp8,0,2.827434539794922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,float16,0,3.075584093729655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,fp8,0,2.94161065419515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,1,64,0,1,fp8,fp8,0,3.925333340962728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,float16,0,3.043498675028483
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,fp8,0,3.0237013498942056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,2,64,0,1,fp8,fp8,0,3.965269406636556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,4,64,0,1,fp8,fp8,0,4.046506563822429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,float16,0,3.1368532180786133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,float16,0,1.82152525583903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,fp8,0,1.7882453600565593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,float16,0,1.3501440684000652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,fp8,0,3.2435201009114585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,24,64,0,1,fp8,fp8,0,2.288128058115641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,8,64,0,1,fp8,fp8,0,4.142933209737142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,fp8,0,1.3986132939656575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,1,64,0,1,fp8,fp8,0,1.9087360699971516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,float16,0,1.3573120435078938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,fp8,0,1.3733545939127605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,fp8,0,1.358847935994466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,2,64,0,1,fp8,fp8,0,1.9162453015645344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,float16,0,1.4054400126139324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,4,64,0,1,fp8,fp8,0,1.9590826034545898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,float16,0,1.5008427302042644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,fp8,0,1.4225066502888997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,float16,0,0.8229546546936035
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,8,64,0,1,fp8,fp8,0,2.0466346740722656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,fp8,0,0.7401813666025797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,float16,0,0.7244799931844076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,24,64,0,1,fp8,fp8,0,1.1595093409220378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,fp8,0,0.7043413321177164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,1,64,0,1,fp8,fp8,0,1.0002773602803547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,float16,0,0.6941013336181641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,fp8,0,0.7159466743469238
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,float16,0,0.6971733570098877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,2,64,0,1,fp8,fp8,0,0.9915733337402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,fp8,0,0.7115093072255453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,float16,0,0.6992213726043701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,4,64,0,1,fp8,fp8,0,0.9886720180511475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,fp8,0,0.7150932947794596
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,8,64,0,1,fp8,fp8,0,1.0076159636179607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,float16,0,0.40447998046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,fp8,0,0.39560532569885254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,24,64,0,1,fp8,fp8,0,0.5654186805089315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,float16,0,0.400383989016215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,fp8,0,0.4164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,1,64,0,1,fp8,fp8,0,0.5618346532185873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,float16,0,0.38860801855723065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,fp8,0,0.3979946772257487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,2,64,0,1,fp8,fp8,0,0.5490346749623617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,float16,0,0.3942399819691976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,fp8,0,0.3990186850229899
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,4,64,0,1,fp8,fp8,0,0.5560319821039835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,float16,0,0.3978240092595418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,fp8,0,0.3916800022125244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,8,64,0,1,fp8,fp8,0,0.5587626695632935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,float16,0,7.9148375193278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,fp8,0,8.235008239746094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,float16,0,7.974912007649739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,1,64,0,1,fp8,fp8,0,9.61843172709147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,fp8,0,8.661845525105795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,float16,0,8.313173294067383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,2,64,0,1,fp8,fp8,0,9.908053080240885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,fp8,0,8.695637385050455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,4,64,0,1,fp8,fp8,0,10.276693344116211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,float16,0,5.064703941345215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,fp8,0,4.756650606791179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,float16,0,3.678720156351725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,float16,0,8.584704081217447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,24,64,0,1,fp8,fp8,0,5.734399795532227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,fp8,0,8.355498631795248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,8,64,0,1,fp8,fp8,0,10.893311818440756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,fp8,0,3.627690633138021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,1,64,0,1,fp8,fp8,0,4.7078399658203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,float16,0,3.7041492462158203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,fp8,0,3.7606401443481445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,float16,0,3.727701187133789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,2,64,0,1,fp8,fp8,0,4.727295875549316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,fp8,0,3.67906125386556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,4,64,0,1,fp8,fp8,0,4.842325210571289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,float16,0,3.94871457417806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,float16,0,2.4250027338663735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,fp8,0,4.102314631144206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,fp8,0,2.265087922414144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,float16,0,1.6767999331156414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,24,64,0,1,fp8,fp8,0,2.8074668248494468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,8,64,0,1,fp8,fp8,0,4.991317431131999
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,fp8,0,1.6529067357381184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,1,64,0,1,fp8,fp8,0,2.2478507359822593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,float16,0,1.726293404897054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,fp8,0,1.7257812817891438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,float16,0,1.7348267237345378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,fp8,0,1.757354736328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,2,64,0,1,fp8,fp8,0,2.2580906550089517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,4,64,0,1,fp8,fp8,0,2.3026347160339355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,float16,0,1.8865493138631184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,fp8,0,1.8283519744873047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,float16,0,1.1240106423695881
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,fp8,0,1.0542079607645671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,float16,0,0.8040106296539307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,24,64,0,1,fp8,fp8,0,1.4185813268025715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,8,64,0,1,fp8,fp8,0,2.4418986638387046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,fp8,0,0.7982079982757568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,1,64,0,1,fp8,fp8,0,1.1194026470184326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,float16,0,0.7881386280059814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,fp8,0,0.8166399796803793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,2,64,0,1,fp8,fp8,0,1.1129173437754314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,float16,0,0.7877973715464274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,fp8,0,0.8029867013295492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,fp8,0,0.8045226732889811
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,4,64,0,1,fp8,fp8,0,1.154901345570882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,float16,0,0.8133973280588785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,8,64,0,1,fp8,fp8,0,1.2177066802978516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,float16,0,0.4495360056559245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,fp8,0,0.4379306634267171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,float16,0,0.41915734608968097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,24,64,0,1,fp8,fp8,0,0.7017813523610433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,fp8,0,0.4140373468399048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,1,64,0,1,fp8,fp8,0,0.6085973183314005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,float16,0,0.4135253429412842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,fp8,0,0.41147732734680176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,float16,0,0.4254719813664754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,2,64,0,1,fp8,fp8,0,0.6016000111897787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,fp8,0,0.4222293297449748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,4,64,0,1,fp8,fp8,0,0.6014293432235718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,float16,0,0.4253013531366984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,fp8,0,0.4312746524810791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,8,64,0,1,fp8,fp8,0,0.606549342473348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,float16,0,0.24183466037114462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,fp8,0,0.24371200799942017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,24,64,0,1,fp8,fp8,0,0.3619840145111084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,float16,0,0.24968532721201578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,fp8,0,0.24883200724919638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,1,64,0,1,fp8,fp8,0,0.34986666838328045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,float16,0,0.24576000372568765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,fp8,0,0.24302933613459268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,2,64,0,1,fp8,fp8,0,0.34406399726867676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,float16,0,0.24388267596562704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,fp8,0,0.2469546596209208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,4,64,0,1,fp8,fp8,0,0.3466240167617798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,fp8,0,0.24302933613459268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,float16,0,0.24183466037114462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,8,64,0,1,fp8,fp8,0,0.3524266481399536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,float16,0,8.040447870890299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,fp8,0,7.935146967569987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,float16,0,8.68505605061849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,1,64,0,1,fp8,fp8,0,9.544874827067057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,fp8,0,8.342869440714518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,float16,0,8.535210927327475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,2,64,0,1,fp8,fp8,0,10.251946767171225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,fp8,0,8.552277247111002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,4,64,0,1,fp8,fp8,0,10.596181233723959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,float16,0,9.224021275838217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,float16,0,5.568341573079427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,fp8,0,5.217962582906087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,float16,0,3.726506551106771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,24,64,0,1,fp8,fp8,0,6.1312001546223955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,fp8,0,9.12554677327474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,8,64,0,1,fp8,fp8,0,11.378859202067057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,fp8,0,3.6164267857869468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,float16,0,3.8348798751831055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,1,64,0,1,fp8,fp8,0,4.461909294128418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,fp8,0,3.7434027989705405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,2,64,0,1,fp8,fp8,0,4.644522666931152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,float16,0,3.9041706720987954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,fp8,0,3.9166294733683267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,4,64,0,1,fp8,fp8,0,4.79965877532959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,float16,0,4.195157368977864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,float16,0,2.610858599344889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,fp8,0,4.176042556762695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,fp8,0,2.517845312754313
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,24,64,0,1,fp8,fp8,0,2.8905814488728843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,float16,0,1.740458647410075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,8,64,0,1,fp8,fp8,0,5.063680013020833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,fp8,0,1.6957440376281738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,1,64,0,1,fp8,fp8,0,2.1362346013387046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,float16,0,1.7448959350585938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,fp8,0,1.7549653053283691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,2,64,0,1,fp8,fp8,0,2.205354690551758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,float16,0,1.8286933898925781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,fp8,0,1.8319360415140789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,4,64,0,1,fp8,fp8,0,2.2580906550089517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,float16,0,2.004821300506592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,fp8,0,1.9351894060770671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,float16,0,1.238869349161784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,8,64,0,1,fp8,fp8,0,2.380629380544027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,float16,0,0.7831892967224121
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,fp8,0,1.1793066660563152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,24,64,0,1,fp8,fp8,0,1.4462292989095051
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,fp8,0,0.7551999886830648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,1,64,0,1,fp8,fp8,0,1.0967040061950684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,float16,0,0.7681706746419271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,fp8,0,0.779263973236084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,2,64,0,1,fp8,fp8,0,1.076053301493327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,fp8,0,0.7999146779378256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,4,64,0,1,fp8,fp8,0,1.1240106423695881
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,float16,0,0.8081066608428955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,float16,0,0.8861013253529867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,fp8,0,0.8586239814758301
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,8,64,0,1,fp8,fp8,0,1.1936426957448323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,float16,0,0.5142186482747396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,fp8,0,0.44339199860890705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,float16,0,0.392192006111145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,fp8,0,0.3952639897664388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,24,64,0,1,fp8,fp8,0,0.7396693229675293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,1,64,0,1,fp8,fp8,0,0.547327995300293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,float16,0,0.3979946772257487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,fp8,0,0.39031465848286945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,2,64,0,1,fp8,fp8,0,0.5461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,float16,0,0.3940693140029907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,fp8,0,0.39082666238149005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,fp8,0,0.3942399819691976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,4,64,0,1,fp8,fp8,0,0.5515946547190348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,float16,0,0.40482131640116376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,8,64,0,1,fp8,fp8,0,0.5623466571172079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,fp8,0,0.2310826579729716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,float16,0,0.2285226583480835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,float16,0,0.20718934138615927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,24,64,0,1,fp8,fp8,0,0.3131733338038127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,fp8,0,0.20736000935236612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,1,64,0,1,fp8,fp8,0,0.29781333605448407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,float16,0,0.21026132504145303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,fp8,0,0.21026132504145303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,2,64,0,1,fp8,fp8,0,0.3022506634394328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,fp8,0,0.20770132541656494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,float16,0,0.2070186734199524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,4,64,0,1,fp8,fp8,0,0.30105600754419964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,float16,0,0.2065066695213318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,fp8,0,0.20872533321380615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,8,64,0,1,fp8,fp8,0,0.30498133103052777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,float16,0,0.13431466619173685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,fp8,0,0.13260799646377563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,24,64,0,1,fp8,fp8,0,0.1781760056813558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,float16,0,0.1360213359196981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,fp8,0,0.13704533378283182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,1,64,0,1,fp8,fp8,0,0.1802240014076233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,float16,0,0.1360213359196981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,fp8,0,0.13516799608866373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,2,64,0,1,fp8,fp8,0,0.17698132991790771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,float16,0,0.1353386640548706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,fp8,0,0.13414399822553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,4,64,0,1,fp8,fp8,0,0.1807360053062439
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,float16,0,0.13329066832860312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,fp8,0,0.13482667009035745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,8,64,0,1,fp8,fp8,0,0.1786880095799764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,fp8,0,4.636330604553223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,float16,0,4.792490641276042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,float16,0,4.9821014404296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,1,64,0,1,fp8,fp8,0,5.5673173268636065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,fp8,0,4.932266553243001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,2,64,0,1,fp8,fp8,0,5.952512105305989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,float16,0,5.1908267339070635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,fp8,0,5.223594665527344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,4,64,0,1,fp8,fp8,0,6.28770128885905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,float16,0,5.822634379069011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,float16,0,2.2476800282796225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,float16,0,3.560960133870443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,fp8,0,3.359402656555176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,fp8,0,5.561855951944987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,24,64,0,1,fp8,fp8,0,3.837951978047689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,8,64,0,1,fp8,fp8,0,6.810282389322917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,fp8,0,2.2072319984436035
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,1,64,0,1,fp8,fp8,0,2.610175927480062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,float16,0,2.3309653600056968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,fp8,0,2.269696076711019
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,2,64,0,1,fp8,fp8,0,2.7180372873942056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,float16,0,2.4232959747314453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,fp8,0,2.344106674194336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,4,64,0,1,fp8,fp8,0,2.808490753173828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,float16,0,2.643967946370443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,fp8,0,2.624512036641439
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,float16,0,1.6880639394124348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,float16,0,0.9975466728210449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,fp8,0,1.6017066637674968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,8,64,0,1,fp8,fp8,0,3.0373547871907554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,24,64,0,1,fp8,fp8,0,1.846783955891927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,fp8,0,1.016149361928304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,1,64,0,1,fp8,fp8,0,1.3028693199157715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,float16,0,1.0574506918589275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,fp8,0,1.021781365076701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,float16,0,1.0914133389790852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,2,64,0,1,fp8,fp8,0,1.3276159763336182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,fp8,0,1.0821973482767742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,float16,0,1.2552533149719238
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,4,64,0,1,fp8,fp8,0,1.3805227279663086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,fp8,0,1.199445327123006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,8,64,0,1,fp8,fp8,0,1.4912853240966797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,float16,0,0.7662933667500814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,fp8,0,0.7098026275634766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,24,64,0,1,fp8,fp8,0,0.934229294459025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,float16,0,0.47086934248606366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,fp8,0,0.4751360019048055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,1,64,0,1,fp8,fp8,0,0.6309546629587809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,float16,0,0.4623359839121501
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,fp8,0,0.47325865427652997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,2,64,0,1,fp8,fp8,0,0.629589319229126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,float16,0,0.46301865577697754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,fp8,0,0.45124268531799316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,4,64,0,1,fp8,fp8,0,0.6609919865926107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,float16,0,0.49612800280253094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,fp8,0,0.47172268231709796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,8,64,0,1,fp8,fp8,0,0.7307946681976318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,float16,0,0.26658133665720624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,fp8,0,0.25497599442799884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,float16,0,0.22664533058802286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,24,64,0,1,fp8,fp8,0,0.4500480095545451
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,fp8,0,0.23091200987497965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,1,64,0,1,fp8,fp8,0,0.33075199524561566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,float16,0,0.23398399353027344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,fp8,0,0.23517866929372153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,2,64,0,1,fp8,fp8,0,0.3327999909718831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,float16,0,0.2367146611213684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,fp8,0,0.2367146611213684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,float16,0,0.24439465999603271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,4,64,0,1,fp8,fp8,0,0.3336533308029175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,fp8,0,0.24593067169189453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,8,64,0,1,fp8,fp8,0,0.33553067843119305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,float16,0,0.14045866330464682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,fp8,0,0.13875200351079306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,24,64,0,1,fp8,fp8,0,0.19319466749827066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,fp8,0,0.1365333298842112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,float16,0,0.13499733805656433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,fp8,0,0.1353386640548706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,float16,0,0.13294933239618936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,1,64,0,1,fp8,fp8,0,0.19114667177200317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,2,64,0,1,fp8,fp8,0,0.19268266359965006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,float16,0,0.13482667009035745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,fp8,0,0.1327786644299825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,float16,0,0.13448533415794373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,fp8,0,0.13329066832860312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,4,64,0,1,fp8,fp8,0,0.18961066007614136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,8,64,0,1,fp8,fp8,0,0.18875734011332193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,float16,0,0.09437867005666097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,fp8,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,24,64,0,1,fp8,fp8,0,0.11502933502197266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,float16,0,0.0988159974416097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,fp8,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,1,64,0,1,fp8,fp8,0,0.1155413289864858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,float16,0,0.09523199995358785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,fp8,0,0.09710933764775594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,2,64,0,1,fp8,fp8,0,0.11383466919263203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,float16,0,0.09847467144330342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,fp8,0,0.09506133198738098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,4,64,0,1,fp8,fp8,0,0.11281067132949829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,float16,0,0.0942080020904541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,8,64,0,1,fp8,fp8,0,0.11281067132949829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,fp8,0,0.0936959981918335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,float16,0,5.093546549479167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,fp8,0,4.951381365458171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,1,64,0,1,fp8,fp8,0,5.523626963297526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,float16,0,5.3650773366292315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,fp8,0,5.159082730611165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,2,64,0,1,fp8,fp8,0,5.808981577555339
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,float16,0,5.59991455078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,fp8,0,5.413888295491536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,4,64,0,1,fp8,fp8,0,6.007466634114583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,float16,0,6.288895924886067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,fp8,0,6.0499623616536455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,float16,0,2.378239949544271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,8,64,0,1,fp8,fp8,0,6.711637496948242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,fp8,0,3.9534934361775718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,float16,0,4.173823992411296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,24,64,0,1,fp8,fp8,0,4.201642672220866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,fp8,0,2.384554704030355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,1,64,0,1,fp8,fp8,0,2.64738130569458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,float16,0,2.502314726511637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,fp8,0,2.4661332766215005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,float16,0,2.630143960316976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,2,64,0,1,fp8,fp8,0,2.8344319661458335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,fp8,0,2.5797972679138184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,4,64,0,1,fp8,fp8,0,2.9825706481933594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,float16,0,3.0207999547322593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,fp8,0,2.8909225463867188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,8,64,0,1,fp8,fp8,0,3.2128000259399414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,float16,0,2.0237654050191245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,float16,0,1.1047253608703613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,fp8,0,1.1298133532206218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,fp8,0,1.916927973429362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,24,64,0,1,fp8,fp8,0,2.0358826319376626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,1,64,0,1,fp8,fp8,0,1.319423993428548
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,float16,0,1.1537066300710042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,fp8,0,1.1484159628550212
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,2,64,0,1,fp8,fp8,0,1.3547520637512207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,float16,0,1.2238506476084392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,fp8,0,1.2148053646087646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,4,64,0,1,fp8,fp8,0,1.4324053128560383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,float16,0,1.412607987721761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,fp8,0,1.3511679967244465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,8,64,0,1,fp8,fp8,0,1.5626239776611328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,float16,0,0.9470293521881104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,fp8,0,0.8878080050150553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,float16,0,0.47121067841847736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,fp8,0,0.46165335178375244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,24,64,0,1,fp8,fp8,0,1.014954646428426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,1,64,0,1,fp8,fp8,0,0.64955735206604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,float16,0,0.4872533480326335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,fp8,0,0.4739413261413574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,2,64,0,1,fp8,fp8,0,0.6531413396199545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,float16,0,0.5089279810587565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,fp8,0,0.5022720098495483
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,4,64,0,1,fp8,fp8,0,0.7039999961853027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,float16,0,0.6022826830546061
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,fp8,0,0.584874669710795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,8,64,0,1,fp8,fp8,0,0.7874560356140137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,float16,0,0.3520853519439697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,fp8,0,0.29627732435862225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,24,64,0,1,fp8,fp8,0,0.5109759966532389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,float16,0,0.2387626568476359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,fp8,0,0.23227733373641968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,1,64,0,1,fp8,fp8,0,0.3145386576652527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,float16,0,0.24285866816838583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,fp8,0,0.2367146611213684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,2,64,0,1,fp8,fp8,0,0.3184640010197957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,float16,0,0.2367146611213684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,fp8,0,0.23347200949986777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,4,64,0,1,fp8,fp8,0,0.32494932413101196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,float16,0,0.23893332481384277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,fp8,0,0.2409813404083252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,8,64,0,1,fp8,fp8,0,0.33126399914423627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,float16,0,0.13738666971524557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,fp8,0,0.12919466694196066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,24,64,0,1,fp8,fp8,0,0.18090667327245077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,float16,0,0.1293653349081675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,fp8,0,0.13056000073750815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,1,64,0,1,fp8,fp8,0,0.17151999473571777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,float16,0,0.1264639993508657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,fp8,0,0.12868266304334006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,2,64,0,1,fp8,fp8,0,0.17339734236399332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,float16,0,0.12731732924779257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,fp8,0,0.12834133704503378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,4,64,0,1,fp8,fp8,0,0.1713493267695109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,float16,0,0.12782933314641318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,fp8,0,0.1293653349081675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,8,64,0,1,fp8,fp8,0,0.17339734236399332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,float16,0,0.07935999830563863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,fp8,0,0.08055466910203297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,24,64,0,1,fp8,fp8,0,0.10461866855621338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,float16,0,0.08140799899895985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,fp8,0,0.07935999830563863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,1,64,0,1,fp8,fp8,0,0.10120532910029094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,float16,0,0.0773119976123174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,fp8,0,0.0773119976123174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,2,64,0,1,fp8,fp8,0,0.10257066289583842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,float16,0,0.07765333354473114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,fp8,0,0.07714133461316426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,4,64,0,1,fp8,fp8,0,0.10257066289583842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,float16,0,0.07748266557852428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,fp8,0,0.07850666840871175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,8,64,0,1,fp8,fp8,0,0.10052266716957092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,float16,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,24,64,0,1,fp8,fp8,0,0.06365866462389629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,float16,0,0.048810665806134544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,1,64,0,1,fp8,fp8,0,0.06348800162474315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,float16,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,2,64,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,fp8,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,4,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,float16,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,fp8,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,8,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,1,64,0,1,float16,float16,0,3.667797406514486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,1,64,0,1,float16,fp8,0,3.698176066080729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,1,64,0,1,fp8,fp8,0,3.775829315185547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,2,64,0,1,float16,float16,0,4.05077330271403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,2,64,0,1,float16,fp8,0,3.970730781555176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,2,64,0,1,fp8,fp8,0,4.037461280822754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,4,64,0,1,float16,float16,0,4.330837249755859
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,4,64,0,1,float16,fp8,0,4.257792154947917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,4,64,0,1,fp8,fp8,0,4.319914817810059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,8,64,0,1,float16,float16,0,5.232981363932292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,8,64,0,1,float16,fp8,0,5.010602633158366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,24,64,0,1,float16,float16,0,3.8301013310750327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,8,64,0,1,fp8,fp8,0,5.087914784749349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,24,64,0,1,float16,fp8,0,3.5991894404093423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,1,64,0,1,float16,float16,0,1.8000213305155437
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,24,64,0,1,fp8,fp8,0,3.363840103149414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,1,64,0,1,float16,fp8,0,1.7228800455729167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,1,64,0,1,fp8,fp8,0,1.8143572807312012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,2,64,0,1,float16,fp8,0,1.853098710378011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,2,64,0,1,float16,float16,0,1.9066880544026692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,2,64,0,1,fp8,fp8,0,1.9877546628316243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,4,64,0,1,float16,float16,0,2.0662612915039062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,4,64,0,1,float16,fp8,0,2.013696034749349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,4,64,0,1,fp8,fp8,0,2.1142187118530273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,8,64,0,1,float16,float16,0,2.502997398376465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,8,64,0,1,float16,fp8,0,2.4335360527038574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,24,64,0,1,float16,fp8,0,1.7515519460042317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,24,64,0,1,float16,float16,0,1.8570240338643391
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,8,64,0,1,fp8,fp8,0,2.4064000447591147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,24,64,0,1,fp8,fp8,0,1.6278187433878581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,1,64,0,1,float16,float16,0,0.8050346374511719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,1,64,0,1,float16,fp8,0,0.8144213358561198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,1,64,0,1,fp8,fp8,0,0.9101653099060059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,2,64,0,1,float16,float16,0,0.8601600329081217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,2,64,0,1,float16,fp8,0,0.85316268603007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,2,64,0,1,fp8,fp8,0,0.9501012961069742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,4,64,0,1,float16,float16,0,0.9770666758219401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,4,64,0,1,float16,fp8,0,0.9456640084584554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,4,64,0,1,fp8,fp8,0,1.0269013245900471
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,8,64,0,1,float16,float16,0,1.1542186737060547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,8,64,0,1,float16,fp8,0,1.1050666968027751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,24,64,0,1,float16,float16,0,0.8487253189086914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,8,64,0,1,fp8,fp8,0,1.1526827017466228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,24,64,0,1,float16,fp8,0,0.785919984181722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,1,64,0,1,float16,float16,0,0.3123199939727783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,24,64,0,1,fp8,fp8,0,0.7988906701405843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,1,64,0,1,float16,fp8,0,0.31624533732732135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,1,64,0,1,fp8,fp8,0,0.42820266882578534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,2,64,0,1,float16,float16,0,0.32767999172210693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,2,64,0,1,float16,fp8,0,0.3304106593132019
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,2,64,0,1,fp8,fp8,0,0.4399786790211995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,4,64,0,1,float16,fp8,0,0.35072000821431476
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,4,64,0,1,float16,float16,0,0.36522666613260907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,4,64,0,1,fp8,fp8,0,0.490666667620341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,8,64,0,1,float16,float16,0,0.4734293222427368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,8,64,0,1,float16,fp8,0,0.4432213306427002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,8,64,0,1,fp8,fp8,0,0.5649066766103109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,24,64,0,1,float16,float16,0,0.2841599980990092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,24,64,0,1,fp8,fp8,0,0.40140799681345624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,24,64,0,1,float16,fp8,0,0.2326186696688334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,1,64,0,1,float16,float16,0,0.15291733543078104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,1,64,0,1,float16,fp8,0,0.15359999736150107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,2,64,0,1,float16,float16,0,0.15598932902018228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,1,64,0,1,fp8,fp8,0,0.2053119937578837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,2,64,0,1,float16,fp8,0,0.15411200126012167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,2,64,0,1,fp8,fp8,0,0.20548266172409058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,4,64,0,1,float16,float16,0,0.15615999698638916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,4,64,0,1,float16,fp8,0,0.1551359991232554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,8,64,0,1,float16,float16,0,0.15957333644231161
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,4,64,0,1,fp8,fp8,0,0.2053119937578837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,8,64,0,1,float16,fp8,0,0.15769599874814352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,8,64,0,1,fp8,fp8,0,0.21504000822703043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,24,64,0,1,float16,float16,0,0.09147733449935913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,24,64,0,1,float16,fp8,0,0.09267200032869975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,24,64,0,1,fp8,fp8,0,0.1186133325099945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,1,64,0,1,float16,float16,0,0.08823466300964355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,1,64,0,1,float16,fp8,0,0.0885759989420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,1,64,0,1,fp8,fp8,0,0.11178666353225708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,2,64,0,1,float16,float16,0,0.08891733487447102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,2,64,0,1,float16,fp8,0,0.0890880028406779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,2,64,0,1,fp8,fp8,0,0.11281067132949829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,4,64,0,1,float16,float16,0,0.0885759989420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,4,64,0,1,float16,fp8,0,0.08772266904513042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,8,64,0,1,float16,float16,0,0.08840533097585042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,4,64,0,1,fp8,fp8,0,0.11229866743087769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,8,64,0,1,float16,fp8,0,0.08891733487447102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,8,64,0,1,fp8,fp8,0,0.11195733149846394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,24,64,0,1,float16,float16,0,0.053930665055910744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,24,64,0,1,float16,fp8,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,24,64,0,1,fp8,fp8,0,0.06946133573849995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,1,64,0,1,float16,float16,0,0.050517335534095764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,1,64,0,1,float16,fp8,0,0.05205333232879639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,1,64,0,1,fp8,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,2,64,0,1,float16,float16,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,2,64,0,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,2,64,0,1,fp8,fp8,0,0.06741333504517873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,4,64,0,1,float16,float16,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,4,64,0,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,4,64,0,1,fp8,fp8,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,8,64,0,1,float16,float16,0,0.05222400029500326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,8,64,0,1,float16,fp8,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,24,64,0,1,float16,float16,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,8,64,0,1,fp8,fp8,0,0.06877866884072621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,24,64,0,1,float16,fp8,0,0.03328000009059906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,24,64,0,1,fp8,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,1,64,0,1,float16,float16,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,1,64,0,1,float16,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,1,64,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,2,64,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,2,64,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,2,64,0,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,4,64,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,4,64,0,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,4,64,0,1,fp8,fp8,0,0.039936001102129616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,8,64,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,8,64,0,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,24,64,0,1,float16,float16,0,0.021674667795499165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,8,64,0,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,24,64,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,24,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,1,64,0,1,float16,float16,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,1,64,0,1,float16,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,1,64,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,2,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,2,64,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,2,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,4,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,4,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,4,64,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,8,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,8,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,8,64,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,1,64,0,1,fp8,fp8,0,1.4187520345052083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,1,64,0,1,float16,fp8,0,1.5397547086079915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,1,64,0,1,float16,float16,0,1.5612586339314778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,2,64,0,1,float16,float16,0,1.6989866892496746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,2,64,0,1,float16,fp8,0,1.6807252566019695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,2,64,0,1,fp8,fp8,0,1.5820800463358562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,4,64,0,1,float16,float16,0,1.8901333808898926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,4,64,0,1,float16,fp8,0,1.8333013852437336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,4,64,0,1,fp8,fp8,0,1.70632537206014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,8,64,0,1,float16,float16,0,2.33079465230306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,8,64,0,1,float16,fp8,0,2.2220800717671714
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,8,64,0,1,fp8,fp8,0,1.9997013409932454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,24,64,0,1,float16,float16,0,1.8580479621887207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,24,64,0,1,float16,fp8,0,1.7269760767618816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,24,64,0,1,fp8,fp8,0,1.4474239349365234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,1,64,0,1,float16,float16,0,0.6988800366719564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,1,64,0,1,float16,fp8,0,0.7012693087259928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,1,64,0,1,fp8,fp8,0,0.6949546337127686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,2,64,0,1,float16,float16,0,0.7461547056833903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,2,64,0,1,float16,fp8,0,0.7389866511027018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,2,64,0,1,fp8,fp8,0,0.740010658899943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,4,64,0,1,float16,float16,0,0.8400213718414307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,4,64,0,1,float16,fp8,0,0.8127146561940511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,4,64,0,1,fp8,fp8,0,0.8108373483022054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,8,64,0,1,float16,float16,0,1.0796373685201008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,8,64,0,1,float16,fp8,0,1.0262186527252197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,8,64,0,1,fp8,fp8,0,0.943615992863973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,24,64,0,1,float16,float16,0,0.8412160078684489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,24,64,0,1,float16,fp8,0,0.794111967086792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,24,64,0,1,fp8,fp8,0,0.7167999744415283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,1,64,0,1,float16,float16,0,0.23705599705378214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,1,64,0,1,float16,fp8,0,0.23244800170262656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,1,64,0,1,fp8,fp8,0,0.3222186764081319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,2,64,0,1,float16,float16,0,0.25548799832661945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,2,64,0,1,float16,fp8,0,0.2512213389078776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,2,64,0,1,fp8,fp8,0,0.3391146659851074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,4,64,0,1,float16,float16,0,0.2954240043958028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,4,64,0,1,float16,fp8,0,0.2903040051460266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,4,64,0,1,fp8,fp8,0,0.385535995165507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,8,64,0,1,float16,float16,0,0.41659732659657794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,8,64,0,1,float16,fp8,0,0.3831466833750407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,24,64,0,1,float16,float16,0,0.26385066906611127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,8,64,0,1,fp8,fp8,0,0.4623359839121501
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,24,64,0,1,float16,fp8,0,0.18943999210993448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,24,64,0,1,fp8,fp8,0,0.34508800506591797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,1,64,0,1,float16,float16,0,0.11741866668065389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,1,64,0,1,float16,fp8,0,0.11212799946467082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,1,64,0,1,fp8,fp8,0,0.1462613344192505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,2,64,0,1,float16,float16,0,0.11520000298817952
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,2,64,0,1,float16,fp8,0,0.11264000336329143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,2,64,0,1,fp8,fp8,0,0.1479680041472117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,4,64,0,1,float16,float16,0,0.11673600474993388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,4,64,0,1,float16,fp8,0,0.11246933539708455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,4,64,0,1,fp8,fp8,0,0.14865066607793173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,8,64,0,1,float16,float16,0,0.11212799946467082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,8,64,0,1,float16,fp8,0,0.11724799871444702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,8,64,0,1,fp8,fp8,0,0.1525759994983673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,24,64,0,1,float16,float16,0,0.07048533360163371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,24,64,0,1,float16,fp8,0,0.0679253339767456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,24,64,0,1,fp8,fp8,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,1,64,0,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,1,64,0,1,float16,fp8,0,0.062122667829195656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,1,64,0,1,fp8,fp8,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,2,64,0,1,float16,float16,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,2,64,0,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,2,64,0,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,4,64,0,1,float16,float16,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,4,64,0,1,float16,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,4,64,0,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,8,64,0,1,float16,float16,0,0.06400000055631001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,8,64,0,1,float16,fp8,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,8,64,0,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,24,64,0,1,float16,float16,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,24,64,0,1,float16,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,24,64,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,1,64,0,1,float16,float16,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,1,64,0,1,float16,fp8,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,1,64,0,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,2,64,0,1,float16,float16,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,2,64,0,1,float16,fp8,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,2,64,0,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,4,64,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,4,64,0,1,float16,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,4,64,0,1,fp8,fp8,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,8,64,0,1,float16,float16,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,8,64,0,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,8,64,0,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,24,64,0,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,24,64,0,1,float16,fp8,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,1,64,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,24,64,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,1,64,0,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,1,64,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,2,64,0,1,float16,float16,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,2,64,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,2,64,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,4,64,0,1,float16,float16,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,4,64,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,4,64,0,1,fp8,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,8,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,8,64,0,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,8,64,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,24,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,24,64,0,1,float16,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,24,64,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,1,64,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,1,64,0,1,float16,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,1,64,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,2,64,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,2,64,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,2,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,4,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,4,64,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,4,64,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,8,64,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,8,64,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,8,64,0,1,fp8,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,24,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,24,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,24,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,1,64,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,1,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,1,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,2,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,2,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,2,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,4,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,4,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,4,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,8,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,8,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,8,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,1,64,0,1,fp8,fp8,0,0.6097919940948486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,1,64,0,1,float16,fp8,0,0.6686720053354899
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,1,64,0,1,float16,float16,0,0.6717440287272135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,2,64,0,1,float16,float16,0,0.7531519730885824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,2,64,0,1,float16,fp8,0,0.7369386355082194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,2,64,0,1,fp8,fp8,0,0.6444373528162638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,4,64,0,1,float16,float16,0,0.8630613485972086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,4,64,0,1,float16,fp8,0,0.8357546329498291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,4,64,0,1,fp8,fp8,0,0.7173120180765787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,8,64,0,1,float16,float16,0,1.0963626702626545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,8,64,0,1,fp8,fp8,0,0.8719360033671061
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,8,64,0,1,float16,fp8,0,1.0270720322926838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,24,64,0,1,float16,float16,0,0.8627200126647949
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,24,64,0,1,float16,fp8,0,0.8002560138702393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,24,64,0,1,fp8,fp8,0,0.7063893477121989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,1,64,0,1,float16,float16,0,0.19336533546447754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,1,64,0,1,float16,fp8,0,0.19029333194096884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,1,64,0,1,fp8,fp8,0,0.27409066756566364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,2,64,0,1,float16,float16,0,0.21760000785191855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,2,64,0,1,float16,fp8,0,0.20906666914621988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,2,64,0,1,fp8,fp8,0,0.28859732548395794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,4,64,0,1,float16,float16,0,0.27511467536290485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,4,64,0,1,float16,fp8,0,0.2510506709416707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,4,64,0,1,fp8,fp8,0,0.33877333005269367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,8,64,0,1,float16,float16,0,0.438101331392924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,8,64,0,1,fp8,fp8,0,0.4148906469345093
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,8,64,0,1,float16,fp8,0,0.3997013171513875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,24,64,0,1,float16,float16,0,0.26265599330266315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,24,64,0,1,float16,fp8,0,0.1909760038057963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,24,64,0,1,fp8,fp8,0,0.3304106593132019
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,1,64,0,1,float16,float16,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,1,64,0,1,float16,fp8,0,0.08755200107892354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,2,64,0,1,float16,float16,0,0.0890880028406779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,1,64,0,1,fp8,fp8,0,0.11281067132949829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,2,64,0,1,float16,fp8,0,0.08994133273760478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,2,64,0,1,fp8,fp8,0,0.116565336783727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,4,64,0,1,float16,float16,0,0.09096533060073853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,4,64,0,1,float16,fp8,0,0.08925867080688477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,4,64,0,1,fp8,fp8,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,8,64,0,1,float16,fp8,0,0.09011200070381165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,8,64,0,1,float16,float16,0,0.09113599856694539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,8,64,0,1,fp8,fp8,0,0.12390399972597758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,24,64,0,1,float16,float16,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,24,64,0,1,float16,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,24,64,0,1,fp8,fp8,0,0.07133866846561432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,1,64,0,1,float16,float16,0,0.04863999783992767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,1,64,0,1,float16,fp8,0,0.04795733094215393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,1,64,0,1,fp8,fp8,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,2,64,0,1,float16,fp8,0,0.048810665806134544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,2,64,0,1,float16,float16,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,2,64,0,1,fp8,fp8,0,0.06348800162474315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,4,64,0,1,float16,float16,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,4,64,0,1,float16,fp8,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,4,64,0,1,fp8,fp8,0,0.06348800162474315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,8,64,0,1,float16,float16,0,0.04863999783992767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,8,64,0,1,float16,fp8,0,0.04863999783992767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,8,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,24,64,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,24,64,0,1,float16,float16,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,24,64,0,1,fp8,fp8,0,0.04215466479460398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,1,64,0,1,float16,float16,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,1,64,0,1,float16,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,1,64,0,1,fp8,fp8,0,0.03942399968703588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,2,64,0,1,float16,float16,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,2,64,0,1,float16,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,2,64,0,1,fp8,fp8,0,0.04027733455101649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,4,64,0,1,float16,float16,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,4,64,0,1,float16,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,4,64,0,1,fp8,fp8,0,0.04027733455101649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,8,64,0,1,float16,float16,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,8,64,0,1,float16,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,8,64,0,1,fp8,fp8,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,24,64,0,1,float16,float16,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,24,64,0,1,float16,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,1,64,0,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,24,64,0,1,fp8,fp8,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,1,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,1,64,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,2,64,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,2,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,2,64,0,1,fp8,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,4,64,0,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,4,64,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,4,64,0,1,fp8,fp8,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,8,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,8,64,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,8,64,0,1,fp8,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,24,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,24,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,24,64,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,1,64,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,1,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,1,64,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,2,64,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,2,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,2,64,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,4,64,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,4,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,4,64,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,8,64,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,8,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,8,64,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,24,64,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,24,64,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,24,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,1,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,1,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,1,64,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,2,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,2,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,2,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,4,64,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,4,64,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,4,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,8,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,8,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,8,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,24,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,24,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,24,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,1,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,1,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,1,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,2,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,2,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,2,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,4,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,4,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,4,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,8,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,8,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,8,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,1,64,0,1,float16,float16,0,0.1925119956334432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,1,64,0,1,float16,fp8,0,0.18807466824849448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,1,64,0,1,fp8,fp8,0,0.3619840145111084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,2,64,0,1,float16,float16,0,0.22101332743962607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,2,64,0,1,float16,fp8,0,0.21128533283869425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,2,64,0,1,fp8,fp8,0,0.38229334354400635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,4,64,0,1,float16,float16,0,0.28194133440653485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,4,64,0,1,float16,fp8,0,0.26146133740743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,4,64,0,1,fp8,fp8,0,0.4317866563796997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,8,64,0,1,float16,float16,0,0.4317866563796997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,8,64,0,1,float16,fp8,0,0.3985066811243693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,24,64,0,1,float16,float16,0,0.26470400889714557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,8,64,0,1,fp8,fp8,0,0.5046613216400146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,24,64,0,1,float16,fp8,0,0.1800533334414164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,24,64,0,1,fp8,fp8,0,0.3653973340988159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,1,64,0,1,float16,float16,0,0.07099733253320058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,1,64,0,1,float16,fp8,0,0.07116800049940745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,1,64,0,1,fp8,fp8,0,0.1534293293952942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,2,64,0,1,float16,float16,0,0.07167999943097432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,2,64,0,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,2,64,0,1,fp8,fp8,0,0.15479466319084167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,4,64,0,1,float16,float16,0,0.0721919983625412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,4,64,0,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,4,64,0,1,fp8,fp8,0,0.15530666708946228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,8,64,0,1,float16,float16,0,0.07748266557852428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,8,64,0,1,float16,fp8,0,0.07492266595363617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,8,64,0,1,fp8,fp8,0,0.17015467087427774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,24,64,0,1,float16,float16,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,24,64,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,24,64,0,1,fp8,fp8,0,0.09864532947540283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,1,64,0,1,float16,float16,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,1,64,0,1,float16,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,1,64,0,1,fp8,fp8,0,0.08601599931716919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,2,64,0,1,float16,float16,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,2,64,0,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,2,64,0,1,fp8,fp8,0,0.08550399541854858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,4,64,0,1,float16,float16,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,4,64,0,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,4,64,0,1,fp8,fp8,0,0.08584533135096233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,8,64,0,1,float16,float16,0,0.04232533276081085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,8,64,0,1,float16,fp8,0,0.04232533276081085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,24,64,0,1,float16,float16,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,8,64,0,1,fp8,fp8,0,0.08584533135096233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,24,64,0,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,24,64,0,1,float16,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,1,64,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,1,64,0,1,float16,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,1,64,0,1,fp8,fp8,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,2,64,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,2,64,0,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,2,64,0,1,fp8,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,4,64,0,1,float16,float16,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,4,64,0,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,4,64,0,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,8,64,0,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,8,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,8,64,0,1,fp8,fp8,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,24,64,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,24,64,0,1,fp8,fp8,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,24,64,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,1,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,1,64,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,1,64,0,1,fp8,fp8,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,2,64,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,2,64,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,2,64,0,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,4,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,4,64,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,4,64,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,8,64,0,1,float16,float16,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,8,64,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,8,64,0,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,24,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,24,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,24,64,0,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,1,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,1,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,1,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,2,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,2,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,2,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,4,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,4,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,4,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,8,64,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,8,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,8,64,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,24,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,24,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,24,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,1,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,1,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,1,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,2,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,2,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,2,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,4,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,4,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,4,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,8,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,8,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,8,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,24,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,24,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,24,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,1,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,2,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,4,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,4,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,8,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,8,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,8,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,24,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,24,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,24,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,4,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,8,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,8,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,1,64,0,1,float16,float16,0,0.07970133423805237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,1,64,0,1,float16,fp8,0,0.0795306662718455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,1,64,0,1,fp8,fp8,0,0.25361067056655884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,2,64,0,1,float16,float16,0,0.0820906658967336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,2,64,0,1,float16,fp8,0,0.08123733103275299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,2,64,0,1,fp8,fp8,0,0.25412267446517944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,4,64,0,1,float16,float16,0,0.08277333279450734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,4,64,0,1,float16,fp8,0,0.08260266482830048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,4,64,0,1,fp8,fp8,0,0.25361067056655884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,8,64,0,1,float16,float16,0,0.08618666728337605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,8,64,0,1,float16,fp8,0,0.08499200145403545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,24,64,0,1,float16,float16,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,8,64,0,1,fp8,fp8,0,0.2728959918022156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,24,64,0,1,float16,fp8,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,24,64,0,1,fp8,fp8,0,0.1495039959748586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,1,64,0,1,float16,float16,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,1,64,0,1,float16,fp8,0,0.04437333345413208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,1,64,0,1,fp8,fp8,0,0.13431466619173685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,2,64,0,1,float16,float16,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,2,64,0,1,float16,fp8,0,0.04420266548792521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,2,64,0,1,fp8,fp8,0,0.13431466619173685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,4,64,0,1,float16,float16,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,4,64,0,1,float16,fp8,0,0.04420266548792521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,4,64,0,1,fp8,fp8,0,0.13431466619173685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,8,64,0,1,float16,float16,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,8,64,0,1,fp8,fp8,0,0.1358506679534912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,8,64,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,24,64,0,1,float16,float16,0,0.029525332152843475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,24,64,0,1,float16,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,24,64,0,1,fp8,fp8,0,0.07816533247629802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,1,64,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,1,64,0,1,float16,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,1,64,0,1,fp8,fp8,0,0.07645866771539052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,2,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,2,64,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,2,64,0,1,fp8,fp8,0,0.0766293356815974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,4,64,0,1,float16,float16,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,4,64,0,1,float16,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,4,64,0,1,fp8,fp8,0,0.07628799974918365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,8,64,0,1,float16,float16,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,8,64,0,1,float16,fp8,0,0.028330666323502857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,8,64,0,1,fp8,fp8,0,0.07748266557852428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,24,64,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,1,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,24,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,24,64,0,1,fp8,fp8,0,0.04607999821503957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,1,64,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,1,64,0,1,fp8,fp8,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,2,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,2,64,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,4,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,2,64,0,1,fp8,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,4,64,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,4,64,0,1,fp8,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,8,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,8,64,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,8,64,0,1,fp8,fp8,0,0.04539733131726583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,24,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,24,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,24,64,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,1,64,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,1,64,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,1,64,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,2,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,2,64,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,4,64,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,4,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,4,64,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,8,64,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,8,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,2,64,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,8,64,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,24,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,24,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,24,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,1,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,1,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,1,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,2,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,2,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,2,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,4,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,8,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,4,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,4,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,8,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,8,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,24,64,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,24,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,24,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,1,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,2,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,1,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,2,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,2,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,4,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,4,64,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,8,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,24,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,8,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,24,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,24,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,1,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,2,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,4,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,8,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,8,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,8,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,24,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,24,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,24,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,1,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,1,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,1,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,2,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,8,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,8,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,8,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,1,64,0,1,float16,float16,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,1,64,0,1,float16,fp8,0,0.062122667829195656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,1,64,0,1,fp8,fp8,0,0.23449599742889404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,2,64,0,1,float16,float16,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,2,64,0,1,float16,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,2,64,0,1,fp8,fp8,0,0.23398399353027344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,4,64,0,1,float16,float16,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,4,64,0,1,fp8,fp8,0,0.23415466149648032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,8,64,0,1,float16,float16,0,0.06365866462389629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,4,64,0,1,float16,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,8,64,0,1,float16,fp8,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,8,64,0,1,fp8,fp8,0,0.23637332518895468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,24,64,0,1,float16,float16,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,24,64,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,24,64,0,1,fp8,fp8,0,0.12680533528327942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,1,64,0,1,float16,float16,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,1,64,0,1,float16,fp8,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,2,64,0,1,float16,float16,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,1,64,0,1,fp8,fp8,0,0.12595199545224509
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,2,64,0,1,float16,fp8,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,2,64,0,1,fp8,fp8,0,0.12578133742014566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,4,64,0,1,float16,float16,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,4,64,0,1,float16,fp8,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,4,64,0,1,fp8,fp8,0,0.1262933313846588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,8,64,0,1,float16,float16,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,8,64,0,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,8,64,0,1,fp8,fp8,0,0.12680533528327942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,24,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,24,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,24,64,0,1,fp8,fp8,0,0.07116800049940745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,1,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,1,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,1,64,0,1,fp8,fp8,0,0.07099733253320058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,2,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,2,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,2,64,0,1,fp8,fp8,0,0.07167999943097432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,4,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,4,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,4,64,0,1,fp8,fp8,0,0.07150933146476746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,8,64,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,8,64,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,8,64,0,1,fp8,fp8,0,0.07202133536338806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,24,64,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,24,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,24,64,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,1,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,1,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,1,64,0,1,fp8,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,2,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,2,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,2,64,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,4,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,4,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,4,64,0,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,8,64,0,1,float16,float16,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,8,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,8,64,0,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,24,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,24,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,24,64,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,1,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,1,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,1,64,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,2,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,2,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,2,64,0,1,fp8,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,4,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,4,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,4,64,0,1,fp8,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,8,64,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,8,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,8,64,0,1,fp8,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,24,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,24,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,24,64,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,1,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,2,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,4,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,8,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,24,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,24,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,24,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,1,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,2,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,2,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,4,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,8,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,8,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,8,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,24,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,24,64,0,1,float16,fp8,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,24,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,1,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,1,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,2,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,2,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,4,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,4,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,8,64,0,1,float16,float16,0,0.008842666943868002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,8,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,8,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,24,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,24,64,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,24,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,1,64,0,1,float16,float16,0,0.008474666625261307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,1,64,0,1,float16,fp8,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,2,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,1,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,2,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,2,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,4,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,4,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,4,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,8,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,8,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,8,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,float16,0,30.03118896484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,fp8,0,30.477994283040363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,float16,0,29.7530034383138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,fp8,0,30.08665720621745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,fp8,0,29.88783009847005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,float16,0,31.20947265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,1,64,0,1,fp8,fp8,0,39.128064473470054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,2,64,0,1,fp8,fp8,0,39.64262390136719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,float16,0,15.473834991455078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,fp8,0,15.911424001057943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,float16,0,15.222442626953125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,float16,0,30.334805806477863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,16,64,0,1,fp8,fp8,0,20.5306879679362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,4,64,0,1,fp8,fp8,0,40.179883321126304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,fp8,0,30.059860229492188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,8,64,0,1,fp8,fp8,0,40.47462463378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,fp8,0,15.410858154296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,1,64,0,1,fp8,fp8,0,19.55413309733073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,float16,0,15.046485900878906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,fp8,0,15.328596750895182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,float16,0,15.300949096679688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,fp8,0,15.0741335550944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,2,64,0,1,fp8,fp8,0,19.768319447835285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,4,64,0,1,fp8,fp8,0,19.62291208902995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,float16,0,15.177045186360678
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,float16,0,8.280064264933268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,fp8,0,7.597909291585286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,16,64,0,1,fp8,fp8,0,10.219861348470053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,fp8,0,15.25384521484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,float16,0,8.037546793619791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,8,64,0,1,fp8,fp8,0,20.081493377685547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,fp8,0,7.899818420410156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,float16,0,7.8556162516276045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,1,64,0,1,fp8,fp8,0,9.845247904459635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,fp8,0,7.598762512207031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,fp8,0,7.56223996480306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,float16,0,7.994880040486653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,2,64,0,1,fp8,fp8,0,9.86077880859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,4,64,0,1,fp8,fp8,0,9.978538513183594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,float16,0,3.6915200551350913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,float16,0,7.8593705495198565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,fp8,0,7.334741592407227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,8,64,0,1,fp8,fp8,0,10.018474578857422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,fp8,0,3.7973333994547525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,float16,0,3.5285332997639975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,16,64,0,1,fp8,fp8,0,5.254826545715332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,fp8,0,3.523413340250651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,float16,0,3.4744319915771484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,1,64,0,1,fp8,fp8,0,4.980736096700032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,fp8,0,3.570858637491862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,float16,0,3.338752110799154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,fp8,0,3.6118186314900718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,2,64,0,1,fp8,fp8,0,4.959914525349935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,4,64,0,1,fp8,fp8,0,5.023743947347005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,float16,0,3.4384212493896484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,fp8,0,3.71507199605306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,8,64,0,1,fp8,fp8,0,5.040639877319336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,float16,0,17.78278350830078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,fp8,0,18.09117889404297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,float16,0,17.694037119547527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,1,64,0,1,fp8,fp8,0,22.060203552246094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,fp8,0,17.921194712320965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,2,64,0,1,fp8,fp8,0,22.406143188476562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,float16,0,17.316693623860676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,fp8,0,17.598805745442707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,float16,0,9.853951772054037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,4,64,0,1,fp8,fp8,0,22.806869506835938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,fp8,0,17.56023406982422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,float16,0,18.023082733154297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,fp8,0,9.032533645629883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,float16,0,8.822101593017578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,16,64,0,1,fp8,fp8,0,11.734016418457031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,8,64,0,1,fp8,fp8,0,23.315114339192707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,fp8,0,8.83848508199056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,float16,0,9.208661397298178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,fp8,0,9.066837310791016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,1,64,0,1,fp8,fp8,0,11.076095581054688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,float16,0,8.878421147664389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,2,64,0,1,fp8,fp8,0,11.23959477742513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,fp8,0,8.946346918741861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,4,64,0,1,fp8,fp8,0,11.307008107503256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,float16,0,4.439210573832194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,float16,0,9.059157053629557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,fp8,0,4.440064112345378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,fp8,0,8.764415740966797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,16,64,0,1,fp8,fp8,0,5.918890635172526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,float16,0,4.137983957926433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,8,64,0,1,fp8,fp8,0,11.551743825276693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,fp8,0,4.215807914733887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,fp8,0,4.141909281412761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,float16,0,4.335957209269206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,1,64,0,1,fp8,fp8,0,5.575679779052734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,float16,0,3.9007574717203775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,fp8,0,4.152661323547363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,2,64,0,1,fp8,fp8,0,5.571072260538737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,4,64,0,1,fp8,fp8,0,5.673472086588542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,float16,0,4.1620480219523115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,float16,0,1.9967999458312988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,fp8,0,3.8109865188598633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,fp8,0,1.920512040456136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,float16,0,2.0271786053975425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,16,64,0,1,fp8,fp8,0,2.9259093602498374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,8,64,0,1,fp8,fp8,0,5.789183934529622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,fp8,0,2.0307626724243164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,1,64,0,1,fp8,fp8,0,2.7762346267700195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,float16,0,1.9817813237508137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,fp8,0,2.1087573369344077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,float16,0,2.104490598042806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,2,64,0,1,fp8,fp8,0,2.7740160624186196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,fp8,0,2.1084159215291343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,4,64,0,1,fp8,fp8,0,2.827946662902832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,float16,0,1.926143964131673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,fp8,0,2.0118187268575034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,8,64,0,1,fp8,fp8,0,2.832725207010905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,float16,0,12.72644297281901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,fp8,0,12.363946278889975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,float16,0,12.603392283121744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,fp8,0,13.126485188802084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,1,64,0,1,fp8,fp8,0,15.51684315999349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,float16,0,12.68292236328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,2,64,0,1,fp8,fp8,0,15.83633041381836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,fp8,0,12.723711649576822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,float16,0,6.686890920003255
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,fp8,0,6.1528746287028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,float16,0,12.708352406819662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,4,64,0,1,fp8,fp8,0,16.034815470377605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,fp8,0,12.776789347330729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,16,64,0,1,fp8,fp8,0,8.466943740844727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,float16,0,5.851135889689128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,8,64,0,1,fp8,fp8,0,16.602624257405598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,fp8,0,6.192298889160156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,float16,0,6.001152038574219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,fp8,0,5.86956787109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,1,64,0,1,fp8,fp8,0,7.818239847819011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,float16,0,5.968896230061849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,2,64,0,1,fp8,fp8,0,7.8068052927653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,fp8,0,6.281386693318685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,4,64,0,1,fp8,fp8,0,7.901013056437175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,float16,0,2.9858134587605796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,float16,0,5.865983963012695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,fp8,0,2.9764267603556314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,fp8,0,6.184960047403972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,float16,0,2.7322025299072266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,8,64,0,1,fp8,fp8,0,8.096938451131185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,16,64,0,1,fp8,fp8,0,4.128767967224121
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,fp8,0,2.736639976501465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,1,64,0,1,fp8,fp8,0,3.894442558288574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,float16,0,2.6436266899108887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,fp8,0,2.6874879201253257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,2,64,0,1,fp8,fp8,0,3.8309545516967773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,float16,0,2.6866346995035806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,fp8,0,2.6915839513142905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,4,64,0,1,fp8,fp8,0,3.901269276936849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,float16,0,2.813269297281901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,fp8,0,2.688511848449707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,float16,0,1.3986132939656575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,8,64,0,1,fp8,fp8,0,4.009984016418457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,fp8,0,1.3733545939127605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,float16,0,1.484458605448405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,16,64,0,1,fp8,fp8,0,2.0701866149902344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,fp8,0,1.4054400126139324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,1,64,0,1,fp8,fp8,0,1.9828054110209148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,float16,0,1.48633607228597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,fp8,0,1.4184106190999348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,float16,0,1.4958933194478352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,fp8,0,1.511082649230957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,2,64,0,1,fp8,fp8,0,1.9795626004536946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,4,64,0,1,fp8,fp8,0,1.9770026206970215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,float16,0,1.3902506828308105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,fp8,0,1.3742079734802246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,8,64,0,1,fp8,fp8,0,1.9937280019124348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,float16,0,16.68437321980794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,fp8,0,16.317610422770183
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,float16,0,16.356863657633465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,fp8,0,16.7191899617513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,1,64,0,1,fp8,fp8,0,20.255743662516277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,float16,0,16.883711496988933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,2,64,0,1,fp8,fp8,0,21.050708770751953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,fp8,0,16.451243082682293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,float16,0,8.907605489095053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,fp8,0,9.112234751383463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,float16,0,16.633856455485027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,16,64,0,1,fp8,fp8,0,11.03769556681315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,fp8,0,16.568490346272785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,float16,0,8.488277435302734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,4,64,0,1,fp8,fp8,0,21.684054056803387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,8,64,0,1,fp8,fp8,0,22.252543131510418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,fp8,0,8.11622428894043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,float16,0,8.21572240193685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,1,64,0,1,fp8,fp8,0,10.15671475728353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,fp8,0,8.386730829874674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,float16,0,8.405674616495768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,2,64,0,1,fp8,fp8,0,10.293589274088541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,fp8,0,8.346453348795572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,4,64,0,1,fp8,fp8,0,10.41595713297526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,float16,0,8.354645411173502
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,float16,0,4.086954752604167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,fp8,0,4.065962791442871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,fp8,0,7.964672088623047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,float16,0,3.7893120447794595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,16,64,0,1,fp8,fp8,0,5.453141530354817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,8,64,0,1,fp8,fp8,0,10.774528503417969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,fp8,0,3.4251092274983725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,1,64,0,1,fp8,fp8,0,5.001045227050781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,float16,0,3.6457811991373696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,fp8,0,3.7248001098632812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,float16,0,3.84989865620931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,fp8,0,3.761664072672526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,2,64,0,1,fp8,fp8,0,5.009920120239258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,float16,0,3.883690516153971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,4,64,0,1,fp8,fp8,0,5.188949267069499
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,float16,0,1.9730772972106934
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,fp8,0,3.8067200978597007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,fp8,0,1.9403093655904133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,8,64,0,1,fp8,fp8,0,5.228543917338054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,float16,0,1.7071785926818848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,16,64,0,1,fp8,fp8,0,2.6922667821248374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,fp8,0,1.7370452880859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,1,64,0,1,fp8,fp8,0,2.4635732968648276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,float16,0,1.688576062520345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,fp8,0,1.7058134078979492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,float16,0,1.6740694046020508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,fp8,0,1.6834559440612793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,2,64,0,1,fp8,fp8,0,2.4494080543518066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,4,64,0,1,fp8,fp8,0,2.480639934539795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,float16,0,1.7271466255187988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,fp8,0,1.7264639536539714
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,float16,0,0.9198933442433676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,8,64,0,1,fp8,fp8,0,2.570240020751953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,fp8,0,0.9222826957702637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,float16,0,0.9731413523356119
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,16,64,0,1,fp8,fp8,0,1.357141335805257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,fp8,0,1.003007968266805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,1,64,0,1,fp8,fp8,0,1.3199360370635986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,float16,0,0.964949369430542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,fp8,0,1.0060799916585286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,2,64,0,1,fp8,fp8,0,1.3264213403065999
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,float16,0,0.9873066743214926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,fp8,0,0.9792853196461996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,4,64,0,1,fp8,fp8,0,1.310378630956014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,float16,0,0.979967991511027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,fp8,0,0.9168213208516439
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,8,64,0,1,fp8,fp8,0,1.3260800043741863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,float16,0,9.622698465983072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,fp8,0,9.878186543782553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,fp8,0,9.409877141316732
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,float16,0,9.806506474812826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,1,64,0,1,fp8,fp8,0,11.558228810628256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,float16,0,9.882624308268229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,2,64,0,1,fp8,fp8,0,12.037291208902994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,fp8,0,10.03707758585612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,float16,0,10.19494374593099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,fp8,0,5.014698664347331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,float16,0,4.924245198567708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,4,64,0,1,fp8,fp8,0,12.361216227213541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,float16,0,4.151808102925618
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,fp8,0,9.48258145650228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,16,64,0,1,fp8,fp8,0,6.450005213419597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,8,64,0,1,fp8,fp8,0,12.924757639567057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,fp8,0,4.417706807454427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,float16,0,4.515498797098796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,fp8,0,4.089685440063477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,1,64,0,1,fp8,fp8,0,5.800106684366862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,float16,0,4.600661277770996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,fp8,0,4.496213277180989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,2,64,0,1,fp8,fp8,0,5.948415756225586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,4,64,0,1,fp8,fp8,0,5.881685256958008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,float16,0,4.43613878885905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,float16,0,2.4502612749735513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,fp8,0,4.623018582661946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,float16,0,1.9838293393452961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,fp8,0,2.3700480461120605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,16,64,0,1,fp8,fp8,0,3.158869425455729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,8,64,0,1,fp8,fp8,0,6.0890452067057295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,fp8,0,1.9877546628316243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,float16,0,1.9833173751831055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,1,64,0,1,fp8,fp8,0,2.805077234903971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,fp8,0,1.9293866157531738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,float16,0,1.9880960782368977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,2,64,0,1,fp8,fp8,0,2.823850631713867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,fp8,0,1.9415040016174316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,4,64,0,1,fp8,fp8,0,2.8617385228474936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,float16,0,2.192042668660482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,fp8,0,2.1184852917989097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,float16,0,1.096021334330241
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,8,64,0,1,fp8,fp8,0,2.9876906077067056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,fp8,0,1.0304853121439617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,float16,0,0.994645357131958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,16,64,0,1,fp8,fp8,0,1.6112640698750813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,fp8,0,1.0031786759694417
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,1,64,0,1,fp8,fp8,0,1.43667205174764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,float16,0,1.022976001103719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,fp8,0,0.994645357131958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,2,64,0,1,fp8,fp8,0,1.4318933486938477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,float16,0,0.9925973415374756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,float16,0,0.9958399931589762
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,fp8,0,0.9888426462809244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,4,64,0,1,fp8,fp8,0,1.4416213035583496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,fp8,0,1.014954646428426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,fp8,0,0.5582506656646729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,float16,0,0.5599573453267416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,8,64,0,1,fp8,fp8,0,1.4670507113138835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,float16,0,0.5565439860026041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,16,64,0,1,fp8,fp8,0,0.7918933232625326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,fp8,0,0.5775359869003296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,1,64,0,1,fp8,fp8,0,0.7906986872355143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,float16,0,0.5577386617660522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,fp8,0,0.5640533367792765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,float16,0,0.5536426703135172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,fp8,0,0.564736008644104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,2,64,0,1,fp8,fp8,0,0.7814826965332031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,4,64,0,1,fp8,fp8,0,0.7755093574523926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,float16,0,0.5835093259811401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,fp8,0,0.5504000186920166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,8,64,0,1,fp8,fp8,0,0.7997439702351888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,float16,0,9.44708251953125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,fp8,0,9.052160263061523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,float16,0,9.199274698893229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,fp8,0,9.196032206217447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,1,64,0,1,fp8,fp8,0,10.998784383138021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,float16,0,9.71673583984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,2,64,0,1,fp8,fp8,0,11.833685557047525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,fp8,0,9.645397186279297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,float16,0,5.261823972066243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,fp8,0,5.136213302612305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,float16,0,9.954474767049154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,float16,0,4.226730664571126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,16,64,0,1,fp8,fp8,0,6.338730494181315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,fp8,0,9.993386586507162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,4,64,0,1,fp8,fp8,0,12.215296427408854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,8,64,0,1,fp8,fp8,0,12.879189809163412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,fp8,0,3.9142398834228516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,1,64,0,1,fp8,fp8,0,5.438634872436523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,fp8,0,4.14685853322347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,float16,0,4.277759869893392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,2,64,0,1,fp8,fp8,0,5.493759791056315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,float16,0,4.179114659627278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,fp8,0,4.217514673868815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,4,64,0,1,fp8,fp8,0,5.6451416015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,float16,0,4.539903958638509
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,float16,0,2.5391786893208823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,float16,0,1.867263952891032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,fp8,0,4.652885437011719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,fp8,0,2.376533349355062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,16,64,0,1,fp8,fp8,0,3.048447926839193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,8,64,0,1,fp8,fp8,0,5.918378829956055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,fp8,0,1.8474666277567546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,1,64,0,1,fp8,fp8,0,2.561365286509196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,float16,0,1.9181226094563801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,fp8,0,1.8797225952148438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,float16,0,1.9647146860758464
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,2,64,0,1,fp8,fp8,0,2.6219520568847656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,fp8,0,1.9177813529968262
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,4,64,0,1,fp8,fp8,0,2.711893399556478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,float16,0,2.1375999450683594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,fp8,0,2.0951040585835776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,float16,0,1.1624106566111247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,8,64,0,1,fp8,fp8,0,2.819925308227539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,fp8,0,1.0992639859517415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,float16,0,0.9159680207570394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,16,64,0,1,fp8,fp8,0,1.5539199511210124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,fp8,0,0.9429333209991455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,1,64,0,1,fp8,fp8,0,1.2759040196736653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,float16,0,0.9144319693247477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,fp8,0,0.9151146411895752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,2,64,0,1,fp8,fp8,0,1.2948479652404785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,float16,0,0.8920746644337972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,fp8,0,0.9019733270009359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,4,64,0,1,fp8,fp8,0,1.322325309117635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,float16,0,0.9637546539306641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,fp8,0,0.9209173520406088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,float16,0,0.49544533093770343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,8,64,0,1,fp8,fp8,0,1.4085119565327961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,fp8,0,0.5038079818089803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,16,64,0,1,fp8,fp8,0,0.7499093214670817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,float16,0,0.4747946659723918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,fp8,0,0.47940266132354736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,float16,0,0.4771840174992879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,1,64,0,1,fp8,fp8,0,0.6917119820912679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,fp8,0,0.47547733783721924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,2,64,0,1,fp8,fp8,0,0.6946132977803549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,float16,0,0.48230401674906415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,fp8,0,0.48110934098561603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,float16,0,0.4930560191472371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,4,64,0,1,fp8,fp8,0,0.6922240257263184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,fp8,0,0.48349865277608234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,8,64,0,1,fp8,fp8,0,0.7010986804962158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,float16,0,0.2826240062713623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,fp8,0,0.28142933050791424
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,16,64,0,1,fp8,fp8,0,0.40328534444173175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,float16,0,0.2867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,fp8,0,0.2839893301328023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,1,64,0,1,fp8,fp8,0,0.400383989016215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,float16,0,0.283135990301768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,fp8,0,0.2851840058962504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,2,64,0,1,fp8,fp8,0,0.40089599291483563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,float16,0,0.2836479942003886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,fp8,0,0.27665066719055176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,4,64,0,1,fp8,fp8,0,0.3991893529891968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,float16,0,0.28125866254170734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,fp8,0,0.27767467498779297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,8,64,0,1,fp8,fp8,0,0.40328534444173175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,float16,0,5.19270388285319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,fp8,0,5.152768135070801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,float16,0,5.473450978597005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,1,64,0,1,fp8,fp8,0,6.367061614990234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,fp8,0,5.283839861551921
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,float16,0,5.382656097412109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,fp8,0,5.293567975362142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,2,64,0,1,fp8,fp8,0,6.8597761789957685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,4,64,0,1,fp8,fp8,0,7.087104161580403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,float16,0,3.222698529561361
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,float16,0,5.94432004292806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,float16,0,2.3676586151123047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,fp8,0,3.0774612426757812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,fp8,0,5.822634379069011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,16,64,0,1,fp8,fp8,0,3.761664072672526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,8,64,0,1,fp8,fp8,0,7.564117431640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,fp8,0,2.356565316518148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,float16,0,2.3837013244628906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,1,64,0,1,fp8,fp8,0,3.027455965677897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,fp8,0,2.417151927947998
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,float16,0,2.581845283508301
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,fp8,0,2.455381393432617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,2,64,0,1,fp8,fp8,0,3.082922617594401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,4,64,0,1,fp8,fp8,0,3.2160425186157227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,float16,0,2.8067839940389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,fp8,0,2.6946560541788735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,float16,0,1.5506772994995117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,fp8,0,1.4769493738810222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,8,64,0,1,fp8,fp8,0,3.40718936920166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,float16,0,1.051477352778117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,16,64,0,1,fp8,fp8,0,1.8682880401611328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,fp8,0,1.0610346794128418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,1,64,0,1,fp8,fp8,0,1.4929919242858887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,float16,0,1.0808320045471191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,fp8,0,1.0915839672088623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,2,64,0,1,fp8,fp8,0,1.5213227272033691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,float16,0,1.1197439829508464
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,fp8,0,1.0978986422220867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,float16,0,1.2786346276601155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,4,64,0,1,fp8,fp8,0,1.579861323038737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,fp8,0,1.2588373025258381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,float16,0,0.6635520060857137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,8,64,0,1,fp8,fp8,0,1.6831146876017253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,fp8,0,0.5949440002441406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,16,64,0,1,fp8,fp8,0,0.9490773677825928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,fp8,0,0.5502293507258097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,float16,0,0.5440853436787924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,1,64,0,1,fp8,fp8,0,0.7630506356557211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,float16,0,0.5655893484751383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,2,64,0,1,fp8,fp8,0,0.7661226590474447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,fp8,0,0.558079997698466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,float16,0,0.5587626695632935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,fp8,0,0.5372586647669474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,4,64,0,1,fp8,fp8,0,0.7720959981282552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,fp8,0,0.5435733397801717
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,float16,0,0.5440853436787924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,8,64,0,1,fp8,fp8,0,0.8195412953694662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,float16,0,0.3056640028953552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,fp8,0,0.3065173427263896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,16,64,0,1,fp8,fp8,0,0.4304213523864746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,float16,0,0.28757333755493164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,fp8,0,0.29525333642959595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,1,64,0,1,fp8,fp8,0,0.41437868277231854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,float16,0,0.29713066418965656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,fp8,0,0.29371732473373413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,2,64,0,1,fp8,fp8,0,0.4169386625289917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,float16,0,0.29064534107844037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,fp8,0,0.293887992699941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,4,64,0,1,fp8,fp8,0,0.4107946554819743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,float16,0,0.30293333530426025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,fp8,0,0.30139732360839844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,8,64,0,1,fp8,fp8,0,0.4198400179545085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,float16,0,0.1812480092048645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,fp8,0,0.17800533771514893
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,16,64,0,1,fp8,fp8,0,0.24729599555333456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,float16,0,0.18397865692774454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,fp8,0,0.18175999323527017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,1,64,0,1,fp8,fp8,0,0.24012800057729086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,float16,0,0.18141865730285645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,fp8,0,0.18568533658981323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,2,64,0,1,fp8,fp8,0,0.2515626748402913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,float16,0,0.1802240014076233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,fp8,0,0.17971199750900269
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,4,64,0,1,fp8,fp8,0,0.24081067244211832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,fp8,0,0.18653867642084757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,float16,0,0.18107734123865762
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,8,64,0,1,fp8,fp8,0,0.2409813404083252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,float16,0,5.104981422424316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,fp8,0,5.246464093526204
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,1,64,0,1,fp8,fp8,0,6.369962692260742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,fp8,0,5.659818649291992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,float16,0,5.721941630045573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,float16,0,5.937493642171224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,2,64,0,1,fp8,fp8,0,6.9355519612630205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,fp8,0,5.815637588500977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,4,64,0,1,fp8,fp8,0,7.32586669921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,float16,0,6.740650812784831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,float16,0,3.6761598587036133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,float16,0,2.3980372746785483
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,fp8,0,3.425450642903646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,fp8,0,6.357503890991211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,16,64,0,1,fp8,fp8,0,4.028074582417806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,8,64,0,1,fp8,fp8,0,7.99351437886556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,fp8,0,2.3386452992757163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,float16,0,2.5478827158610025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,1,64,0,1,fp8,fp8,0,2.9402453104654946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,fp8,0,2.469205379486084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,float16,0,2.662399927775065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,fp8,0,2.598911921183268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,2,64,0,1,fp8,fp8,0,3.0892372131347656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,4,64,0,1,fp8,fp8,0,3.255296071370443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,float16,0,2.9660161336263022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,fp8,0,1.6071680386861165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,float16,0,1.6976213455200195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,fp8,0,2.918911933898926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,16,64,0,1,fp8,fp8,0,1.9196586608886719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,float16,0,1.0781013170878093
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,8,64,0,1,fp8,fp8,0,3.540992101033529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,fp8,0,1.0920960108439128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,1,64,0,1,fp8,fp8,0,1.4327467282613118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,float16,0,1.1267413298288982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,fp8,0,1.1093333562215169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,float16,0,1.2025173505147297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,fp8,0,1.162069320678711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,2,64,0,1,fp8,fp8,0,1.4948693911234539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,4,64,0,1,fp8,fp8,0,1.5622827212015789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,float16,0,1.4088533719380696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,fp8,0,1.3327360153198242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,fp8,0,0.7142399946848551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,float16,0,0.7685120105743408
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,8,64,0,1,fp8,fp8,0,1.6887466112772624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,float16,0,0.5271893342336019
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,16,64,0,1,fp8,fp8,0,0.9765546321868896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,fp8,0,0.5343573490778605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,1,64,0,1,fp8,fp8,0,0.7128746509552002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,float16,0,0.5109759966532389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,fp8,0,0.517632007598877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,float16,0,0.5130240122477213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,2,64,0,1,fp8,fp8,0,0.7101439634958903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,fp8,0,0.5198506514231364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,4,64,0,1,fp8,fp8,0,0.7381333510080973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,float16,0,0.5724159876505533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,fp8,0,0.5396480162938436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,8,64,0,1,fp8,fp8,0,0.8360959688822428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,float16,0,0.2868906656901042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,fp8,0,0.283135990301768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,float16,0,0.2568533420562744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,16,64,0,1,fp8,fp8,0,0.45021867752075195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,fp8,0,0.25617067019144696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,1,64,0,1,fp8,fp8,0,0.3768320083618164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,float16,0,0.2662400007247925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,fp8,0,0.2629973292350769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,float16,0,0.27101866404215497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,2,64,0,1,fp8,fp8,0,0.37700267632802326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,fp8,0,0.2653866608937581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,4,64,0,1,fp8,fp8,0,0.377344012260437
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,float16,0,0.2739199995994568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,8,64,0,1,fp8,fp8,0,0.38792534669240314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,fp8,0,0.2769920031229655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,float16,0,0.15240533153216043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,fp8,0,0.15359999736150107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,16,64,0,1,fp8,fp8,0,0.22323199113210043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,fp8,0,0.1546239952246348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,float16,0,0.15428266922632852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,1,64,0,1,fp8,fp8,0,0.21401600042978922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,float16,0,0.155648003021876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,fp8,0,0.1532586713631948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,2,64,0,1,fp8,fp8,0,0.21504000822703043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,float16,0,0.15069866180419922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,fp8,0,0.15411200126012167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,4,64,0,1,fp8,fp8,0,0.2172586719195048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,float16,0,0.1513813336690267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,fp8,0,0.14967466394106546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,8,64,0,1,fp8,fp8,0,0.22272000710169473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,float16,0,0.10547199845314026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,fp8,0,0.10291199882825215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,16,64,0,1,fp8,fp8,0,0.12987732887268066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,float16,0,0.10922666390736897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,fp8,0,0.1063253382841746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,1,64,0,1,fp8,fp8,0,0.13038933277130127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,float16,0,0.10410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,fp8,0,0.1063253382841746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,2,64,0,1,fp8,fp8,0,0.13090133666992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,float16,0,0.10786133011182149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,fp8,0,0.1053013304869334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,4,64,0,1,fp8,fp8,0,0.13124266266822815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,float16,0,0.10376532872517903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,fp8,0,0.10410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,8,64,0,1,fp8,fp8,0,0.12902399897575378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,float16,0,3.133098602294922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,fp8,0,3.128490765889486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,float16,0,3.3664000829060874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,1,64,0,1,fp8,fp8,0,3.6519254048665366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,fp8,0,3.312469482421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,2,64,0,1,fp8,fp8,0,4.019370714823405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,fp8,0,3.510613441467285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,float16,0,3.679744084676107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,4,64,0,1,fp8,fp8,0,4.3642880121866865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,float16,0,4.1999359130859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,float16,0,2.3454720179239907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,fp8,0,3.974656105041504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,fp8,0,2.199552059173584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,8,64,0,1,fp8,fp8,0,4.833109219868978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,float16,0,1.45032533009847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,16,64,0,1,fp8,fp8,0,2.5420799255371094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,fp8,0,1.4501546223958333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,1,64,0,1,fp8,fp8,0,1.759914716084798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,float16,0,1.501354694366455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,fp8,0,1.4807039896647136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,2,64,0,1,fp8,fp8,0,1.8490026791890461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,fp8,0,1.6232105890909831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,float16,0,1.6358399391174316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,4,64,0,1,fp8,fp8,0,1.9483307202657063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,float16,0,1.9003732999165852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,fp8,0,1.8319360415140789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,8,64,0,1,fp8,fp8,0,2.133845329284668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,float16,0,1.0792960325876872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,fp8,0,1.006592035293579
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,float16,0,0.6244693199793497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,16,64,0,1,fp8,fp8,0,1.2455253601074219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,fp8,0,0.6109866698582967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,1,64,0,1,fp8,fp8,0,0.8582826455434164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,float16,0,0.6538240114847819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,fp8,0,0.6345386505126953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,2,64,0,1,fp8,fp8,0,0.8872959613800049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,float16,0,0.6920533180236816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,fp8,0,0.6720853646596273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,4,64,0,1,fp8,fp8,0,0.9644373257954916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,float16,0,0.8410453001658121
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,fp8,0,0.8132266998291016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,8,64,0,1,fp8,fp8,0,1.0642773310343425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,float16,0,0.4355413516362508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,fp8,0,0.36505599816640216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,16,64,0,1,fp8,fp8,0,0.6196906566619873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,float16,0,0.31334400177001953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,fp8,0,0.31385600566864014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,1,64,0,1,fp8,fp8,0,0.4235946734746297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,float16,0,0.3199999928474426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,fp8,0,0.3242666721343994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,2,64,0,1,fp8,fp8,0,0.43212799231211346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,float16,0,0.3145386576652527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,fp8,0,0.3135146697362264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,fp8,0,0.3165866732597351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,float16,0,0.3256319959958394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,4,64,0,1,fp8,fp8,0,0.43673598766326904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,8,64,0,1,fp8,fp8,0,0.4884479840596517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,float16,0,0.17937066157658896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,fp8,0,0.1781760056813558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,16,64,0,1,fp8,fp8,0,0.24473599592844644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,float16,0,0.1634986698627472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,fp8,0,0.16230400403340658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,1,64,0,1,fp8,fp8,0,0.2307413419087728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,float16,0,0.16127999623616537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,fp8,0,0.16366933782895407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,2,64,0,1,fp8,fp8,0,0.2310826579729716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,float16,0,0.16315733393033346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,fp8,0,0.16145066420237222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,4,64,0,1,fp8,fp8,0,0.23552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,float16,0,0.1609386702378591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,fp8,0,0.16127999623616537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,8,64,0,1,fp8,fp8,0,0.233130673567454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,float16,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,fp8,0,0.1032533347606659
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,16,64,0,1,fp8,fp8,0,0.14131200313568115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,float16,0,0.10274133086204529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,fp8,0,0.10257066289583842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,1,64,0,1,fp8,fp8,0,0.1346560021241506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,float16,0,0.10103467106819153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,fp8,0,0.10257066289583842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,2,64,0,1,fp8,fp8,0,0.13482667009035745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,float16,0,0.10359467069307964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,fp8,0,0.10308266679445903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,4,64,0,1,fp8,fp8,0,0.13550933202107748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,float16,0,0.09983999530474345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,fp8,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,8,64,0,1,fp8,fp8,0,0.1346560021241506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,fp8,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,float16,0,0.0679253339767456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,16,64,0,1,fp8,fp8,0,0.09062400460243225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,float16,0,0.06911999980608623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,1,64,0,1,fp8,fp8,0,0.09233066439628601
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,float16,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,fp8,0,0.06929066777229309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,2,64,0,1,fp8,fp8,0,0.09147733449935913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,float16,0,0.06775466601053874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,fp8,0,0.06963199873765309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,4,64,0,1,fp8,fp8,0,0.09181867043177287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,float16,0,0.06775466601053874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,fp8,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,8,64,0,1,fp8,fp8,0,0.09062400460243225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,float16,0,3.3070081075032554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,fp8,0,3.289770762125651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,1,64,0,1,fp8,fp8,0,3.558229446411133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,float16,0,3.5275093714396157
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,fp8,0,3.4322773615519204
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,2,64,0,1,fp8,fp8,0,3.788970629374186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,fp8,0,3.5964587529500327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,float16,0,3.718144098917643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,4,64,0,1,fp8,fp8,0,3.988650639851888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,float16,0,4.308821360270183
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,fp8,0,4.081493377685547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,float16,0,2.689023971557617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,fp8,0,2.535253365834554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,8,64,0,1,fp8,fp8,0,4.497066815694173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,float16,0,1.555456002553304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,16,64,0,1,fp8,fp8,0,2.680661201477051
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,fp8,0,1.5633066495259602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,1,64,0,1,fp8,fp8,0,1.7908053398132324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,float16,0,1.6692907015482585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,fp8,0,1.6501760482788086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,2,64,0,1,fp8,fp8,0,1.8899626731872559
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,float16,0,1.833130677541097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,fp8,0,1.7583786646525066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,4,64,0,1,fp8,fp8,0,1.9722240765889485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,fp8,0,2.0085760752360025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,float16,0,2.09663995107015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,8,64,0,1,fp8,fp8,0,2.200746695200602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,float16,0,1.3134506543477376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,fp8,0,1.2305066585540771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,16,64,0,1,fp8,fp8,0,1.343488057454427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,float16,0,0.6896639664967855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,fp8,0,0.677717367808024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,1,64,0,1,fp8,fp8,0,0.8893439769744873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,float16,0,0.7403519948323568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,fp8,0,0.718506654103597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,float16,0,0.8063999811808268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,2,64,0,1,fp8,fp8,0,0.9270613193511963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,fp8,0,0.7985493342081705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,4,64,0,1,fp8,fp8,0,0.9917439619700114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,float16,0,0.9545386632283529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,fp8,0,0.914090633392334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,8,64,0,1,fp8,fp8,0,1.1182080109914143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,float16,0,0.5645653406778971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,fp8,0,0.5183146794637045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,16,64,0,1,fp8,fp8,0,0.6864213148752848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,float16,0,0.3280213276545207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,1,64,0,1,fp8,fp8,0,0.41830400625864667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,fp8,0,0.3258026639620463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,float16,0,0.32819199562072754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,fp8,0,0.3174399932225545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,float16,0,0.3135146697362264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,fp8,0,0.3176106611887614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,2,64,0,1,fp8,fp8,0,0.42291200160980225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,4,64,0,1,fp8,fp8,0,0.45397333304087323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,float16,0,0.3572053511937459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,fp8,0,0.3346773386001587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,8,64,0,1,fp8,fp8,0,0.5500586827596029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,float16,0,0.17083734273910522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,fp8,0,0.16537599762280783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,16,64,0,1,fp8,fp8,0,0.2955946723620097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,float16,0,0.1604266663392385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,fp8,0,0.16110933820406595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,1,64,0,1,fp8,fp8,0,0.2208426594734192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,float16,0,0.15803733468055725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,fp8,0,0.16059733430544534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,2,64,0,1,fp8,fp8,0,0.21998933951059976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,float16,0,0.15923200050989786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,fp8,0,0.1604266663392385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,4,64,0,1,fp8,fp8,0,0.21913599967956543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,float16,0,0.16196266810099283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,fp8,0,0.1609386702378591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,8,64,0,1,fp8,fp8,0,0.2244266668955485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,float16,0,0.0981333355108897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,fp8,0,0.09898666540781657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,16,64,0,1,fp8,fp8,0,0.12714667121569315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,float16,0,0.09727999567985535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,fp8,0,0.09710933764775594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,1,64,0,1,fp8,fp8,0,0.12680533528327942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,float16,0,0.09489066402117412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,fp8,0,0.09540266791979472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,2,64,0,1,fp8,fp8,0,0.1276586651802063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,float16,0,0.09693866968154907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,fp8,0,0.09608532985051473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,4,64,0,1,fp8,fp8,0,0.12851199507713318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,float16,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,8,64,0,1,fp8,fp8,0,0.1250986655553182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,fp8,0,0.09676800171534221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,float16,0,0.05922133227189382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,fp8,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,float16,0,0.05734399954477946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,fp8,0,0.057855998476346336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,16,64,0,1,fp8,fp8,0,0.07031466563542683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,1,64,0,1,fp8,fp8,0,0.07048533360163371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,float16,0,0.05649066468079885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,fp8,0,0.05751466751098633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,2,64,0,1,fp8,fp8,0,0.06997333467006683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,float16,0,0.05751466751098633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,fp8,0,0.05751466751098633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,4,64,0,1,fp8,fp8,0,0.06980266670385997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,float16,0,0.05649066468079885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,fp8,0,0.05614933371543884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,8,64,0,1,fp8,fp8,0,0.06997333467006683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,float16,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,16,64,0,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,fp8,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,1,64,0,1,fp8,fp8,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,2,64,0,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,float16,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,4,64,0,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,8,64,0,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,1,64,0,1,float16,float16,0,2.43012269337972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,1,64,0,1,float16,fp8,0,2.432682673136393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,1,64,0,1,fp8,fp8,0,2.453674634297689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,2,64,0,1,float16,float16,0,2.664789358774821
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,2,64,0,1,float16,fp8,0,2.6122239430745444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,2,64,0,1,fp8,fp8,0,2.6577919324239097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,4,64,0,1,float16,float16,0,2.97267214457194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,4,64,0,1,float16,fp8,0,2.873173395792643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,4,64,0,1,fp8,fp8,0,2.847914695739746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,8,64,0,1,float16,float16,0,3.692373275756836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,8,64,0,1,float16,fp8,0,3.5046399434407554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,16,64,0,1,float16,float16,0,2.4958292643229165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,16,64,0,1,float16,fp8,0,2.3354026476542153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,8,64,0,1,fp8,fp8,0,3.4372266133626304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,16,64,0,1,fp8,fp8,0,2.147157351175944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,1,64,0,1,float16,float16,0,1.1352746486663818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,1,64,0,1,float16,fp8,0,1.1226452986399333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,1,64,0,1,fp8,fp8,0,1.2390399773915608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,2,64,0,1,float16,float16,0,1.284607966740926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,2,64,0,1,float16,fp8,0,1.2550826867421467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,2,64,0,1,fp8,fp8,0,1.343488057454427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,4,64,0,1,float16,float16,0,1.4527146021525066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,4,64,0,1,float16,fp8,0,1.4027093251546223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,4,64,0,1,fp8,fp8,0,1.4209705988566081
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,8,64,0,1,float16,float16,0,1.8017279307047527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,8,64,0,1,float16,fp8,0,1.7111040751139324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,8,64,0,1,fp8,fp8,0,1.666218598683675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,16,64,0,1,float16,float16,0,1.2101973692576091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,16,64,0,1,float16,fp8,0,1.1427840391794841
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,16,64,0,1,fp8,fp8,0,1.0629119873046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,1,64,0,1,float16,float16,0,0.5039786497751871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,1,64,0,1,float16,fp8,0,0.478549321492513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,1,64,0,1,fp8,fp8,0,0.6087679862976074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,2,64,0,1,float16,float16,0,0.5437440077463785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,2,64,0,1,float16,fp8,0,0.5213866631189982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,2,64,0,1,fp8,fp8,0,0.6507519880930582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,4,64,0,1,float16,float16,0,0.6318080027898153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,4,64,0,1,float16,fp8,0,0.6019413471221924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,4,64,0,1,fp8,fp8,0,0.7212373415629069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,8,64,0,1,float16,float16,0,0.8340480327606201
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,8,64,0,1,float16,fp8,0,0.7773866653442383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,16,64,0,1,float16,float16,0,0.4930560191472371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,8,64,0,1,fp8,fp8,0,0.8430933157602946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,16,64,0,1,float16,fp8,0,0.45124268531799316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,1,64,0,1,float16,float16,0,0.2167466680208842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,16,64,0,1,fp8,fp8,0,0.5386240084966024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,1,64,0,1,float16,fp8,0,0.2135039965311686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,1,64,0,1,fp8,fp8,0,0.26692267258961994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,2,64,0,1,float16,float16,0,0.21026132504145303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,2,64,0,1,float16,fp8,0,0.21760000785191855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,2,64,0,1,fp8,fp8,0,0.2797226707140605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,4,64,0,1,float16,float16,0,0.21145600080490112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,4,64,0,1,float16,fp8,0,0.2167466680208842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,4,64,0,1,fp8,fp8,0,0.3078826665878296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,8,64,0,1,float16,float16,0,0.2611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,8,64,0,1,float16,fp8,0,0.23603200912475586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,8,64,0,1,fp8,fp8,0,0.40089599291483563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,16,64,0,1,float16,float16,0,0.1276586651802063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,16,64,0,1,float16,fp8,0,0.11229866743087769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,16,64,0,1,fp8,fp8,0,0.21742933988571167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,1,64,0,1,float16,float16,0,0.11025066177050273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,1,64,0,1,float16,fp8,0,0.11076266566912334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,1,64,0,1,fp8,fp8,0,0.14284800489743552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,2,64,0,1,float16,float16,0,0.11110400160153706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,2,64,0,1,float16,fp8,0,0.1129813293615977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,2,64,0,1,fp8,fp8,0,0.1431893308957418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,4,64,0,1,float16,fp8,0,0.11246933539708455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,4,64,0,1,float16,float16,0,0.1109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,8,64,0,1,float16,float16,0,0.11110400160153706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,4,64,0,1,fp8,fp8,0,0.14421332875887552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,8,64,0,1,float16,fp8,0,0.11076266566912334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,8,64,0,1,fp8,fp8,0,0.14472533265749613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,16,64,0,1,float16,float16,0,0.06656000018119812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,16,64,0,1,float16,fp8,0,0.06656000018119812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,16,64,0,1,fp8,fp8,0,0.08260266482830048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,1,64,0,1,float16,fp8,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,1,64,0,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,1,64,0,1,fp8,fp8,0,0.08294400076071422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,2,64,0,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,2,64,0,1,float16,fp8,0,0.06417066852251689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,2,64,0,1,fp8,fp8,0,0.0820906658967336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,4,64,0,1,float16,float16,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,4,64,0,1,float16,fp8,0,0.06400000055631001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,4,64,0,1,fp8,fp8,0,0.0817493349313736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,8,64,0,1,float16,float16,0,0.06519466638565063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,8,64,0,1,float16,fp8,0,0.06485333542029063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,8,64,0,1,fp8,fp8,0,0.08140799899895985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,16,64,0,1,float16,float16,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,16,64,0,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,1,64,0,1,float16,float16,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,16,64,0,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,1,64,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,2,64,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,1,64,0,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,2,64,0,1,float16,fp8,0,0.03822933385769526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,2,64,0,1,fp8,fp8,0,0.04351999859015147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,4,64,0,1,float16,float16,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,4,64,0,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,4,64,0,1,fp8,fp8,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,8,64,0,1,float16,float16,0,0.03822933385769526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,8,64,0,1,float16,fp8,0,0.03874133278926214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,8,64,0,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,16,64,0,1,float16,float16,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,16,64,0,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,16,64,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,1,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,1,64,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,1,64,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,2,64,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,2,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,2,64,0,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,4,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,4,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,4,64,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,8,64,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,8,64,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,8,64,0,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,16,64,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,16,64,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,16,64,0,1,fp8,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,1,64,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,1,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,2,64,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,1,64,0,1,fp8,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,2,64,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,2,64,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,4,64,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,4,64,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,4,64,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,8,64,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,8,64,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,8,64,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,1,64,0,1,float16,float16,0,1.0262186527252197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,1,64,0,1,float16,fp8,0,1.0164906978607178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,1,64,0,1,fp8,fp8,0,0.9678506851196289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,2,64,0,1,float16,float16,0,1.1595093409220378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,2,64,0,1,float16,fp8,0,1.1318613688151042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,2,64,0,1,fp8,fp8,0,1.0700799624125164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,4,64,0,1,float16,float16,0,1.3436586062113445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,4,64,0,1,float16,fp8,0,1.300650676091512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,4,64,0,1,fp8,fp8,0,1.1557546456654866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,8,64,0,1,float16,float16,0,1.7305599848429363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,8,64,0,1,float16,fp8,0,1.6501760482788086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,8,64,0,1,fp8,fp8,0,1.3943467140197754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,16,64,0,1,float16,float16,0,1.2014933427174885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,16,64,0,1,float16,fp8,0,1.1238400141398113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,16,64,0,1,fp8,fp8,0,0.9541973272959391
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,1,64,0,1,float16,float16,0,0.40226133664449054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,1,64,0,1,float16,fp8,0,0.398848017056783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,1,64,0,1,fp8,fp8,0,0.4715520143508911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,2,64,0,1,float16,float16,0,0.4647253354390462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,2,64,0,1,float16,fp8,0,0.4457813501358032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,2,64,0,1,fp8,fp8,0,0.5120000044504801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,4,64,0,1,float16,float16,0,0.571562647819519
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,4,64,0,1,float16,fp8,0,0.5430613358815511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,4,64,0,1,fp8,fp8,0,0.5783893267313639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,8,64,0,1,float16,float16,0,0.7995733420054117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,8,64,0,1,float16,fp8,0,0.7809706528981527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,8,64,0,1,fp8,fp8,0,0.7048532962799072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,16,64,0,1,float16,float16,0,0.505514661471049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,16,64,0,1,float16,fp8,0,0.45499734083811444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,16,64,0,1,fp8,fp8,0,0.4753066698710124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,1,64,0,1,float16,float16,0,0.15172266960144043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,1,64,0,1,float16,fp8,0,0.1513813336690267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,1,64,0,1,fp8,fp8,0,0.1925119956334432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,2,64,0,1,float16,float16,0,0.15684266885121664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,2,64,0,1,float16,fp8,0,0.15308800339698792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,2,64,0,1,fp8,fp8,0,0.19353600343068442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,4,64,0,1,float16,float16,0,0.1599146624406179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,4,64,0,1,float16,fp8,0,0.15479466319084167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,4,64,0,1,fp8,fp8,0,0.23756800095240274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,8,64,0,1,float16,float16,0,0.22254933913548788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,8,64,0,1,float16,fp8,0,0.19131733973821005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,8,64,0,1,fp8,fp8,0,0.3346773386001587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,16,64,0,1,float16,float16,0,0.09403733412424724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,16,64,0,1,float16,fp8,0,0.08482133348782857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,16,64,0,1,fp8,fp8,0,0.1781760056813558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,1,64,0,1,float16,float16,0,0.07935999830563863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,1,64,0,1,float16,fp8,0,0.0773119976123174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,1,64,0,1,fp8,fp8,0,0.10274133086204529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,2,64,0,1,float16,float16,0,0.08123733103275299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,2,64,0,1,float16,fp8,0,0.08191999793052673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,2,64,0,1,fp8,fp8,0,0.1013759970664978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,4,64,0,1,float16,fp8,0,0.0820906658967336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,4,64,0,1,float16,float16,0,0.08157866696516673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,4,64,0,1,fp8,fp8,0,0.10274133086204529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,8,64,0,1,float16,float16,0,0.08226133386294048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,8,64,0,1,float16,fp8,0,0.08379733562469482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,8,64,0,1,fp8,fp8,0,0.10461866855621338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,16,64,0,1,float16,float16,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,16,64,0,1,float16,fp8,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,16,64,0,1,fp8,fp8,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,1,64,0,1,float16,float16,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,1,64,0,1,float16,fp8,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,1,64,0,1,fp8,fp8,0,0.058880001306533813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,2,64,0,1,float16,float16,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,2,64,0,1,float16,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,2,64,0,1,fp8,fp8,0,0.05922133227189382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,4,64,0,1,float16,float16,0,0.046762665112813316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,4,64,0,1,float16,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,4,64,0,1,fp8,fp8,0,0.059562668204307556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,8,64,0,1,float16,float16,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,8,64,0,1,float16,fp8,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,8,64,0,1,fp8,fp8,0,0.06007466713587443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,16,64,0,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,16,64,0,1,float16,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,16,64,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,1,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,1,64,0,1,float16,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,1,64,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,2,64,0,1,float16,float16,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,2,64,0,1,float16,fp8,0,0.028330666323502857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,2,64,0,1,fp8,fp8,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,4,64,0,1,float16,float16,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,4,64,0,1,float16,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,4,64,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,8,64,0,1,float16,fp8,0,0.028330666323502857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,8,64,0,1,float16,float16,0,0.028330666323502857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,8,64,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,16,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,16,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,16,64,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,1,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,1,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,1,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,2,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,2,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,2,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,4,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,4,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,4,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,8,64,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,8,64,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,16,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,8,64,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,16,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,16,64,0,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,1,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,1,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,1,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,2,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,2,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,2,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,4,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,4,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,4,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,8,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,8,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,8,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,16,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,16,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,1,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,16,64,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,1,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,1,64,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,2,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,2,64,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,2,64,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,4,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,4,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,4,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,8,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,8,64,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,8,64,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,1,64,0,1,float16,float16,0,0.38331735134124756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,1,64,0,1,float16,fp8,0,0.37751468022664386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,1,64,0,1,fp8,fp8,0,0.4026026725769043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,2,64,0,1,float16,float16,0,0.47189335028330487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,2,64,0,1,float16,fp8,0,0.4509013493855794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,2,64,0,1,fp8,fp8,0,0.444757342338562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,4,64,0,1,float16,fp8,0,0.5659306844075521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,4,64,0,1,float16,float16,0,0.5964800119400024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,4,64,0,1,fp8,fp8,0,0.520362655321757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,8,64,0,1,float16,float16,0,0.8186879952748617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,8,64,0,1,float16,fp8,0,0.7601493199666342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,8,64,0,1,fp8,fp8,0,0.6550186475118002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,16,64,0,1,float16,float16,0,0.5126826763153076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,16,64,0,1,float16,fp8,0,0.46165335178375244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,16,64,0,1,fp8,fp8,0,0.46455466747283936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,1,64,0,1,float16,float16,0,0.11878400047620137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,1,64,0,1,float16,fp8,0,0.11776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,1,64,0,1,fp8,fp8,0,0.14779733618100485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,2,64,0,1,float16,float16,0,0.13038933277130127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,2,64,0,1,float16,fp8,0,0.13312000036239624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,2,64,0,1,fp8,fp8,0,0.1565013329188029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,4,64,0,1,float16,float16,0,0.1367039978504181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,4,64,0,1,float16,fp8,0,0.12544000148773193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,4,64,0,1,fp8,fp8,0,0.20155733823776245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,8,64,0,1,float16,float16,0,0.21179733673731485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,8,64,0,1,float16,fp8,0,0.17356799046198526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,8,64,0,1,fp8,fp8,0,0.30532266696294147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,16,64,0,1,float16,float16,0,0.08550399541854858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,16,64,0,1,float16,fp8,0,0.06758399804433186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,16,64,0,1,fp8,fp8,0,0.16503467162450156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,1,64,0,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,1,64,0,1,float16,fp8,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,1,64,0,1,fp8,fp8,0,0.0817493349313736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,2,64,0,1,float16,float16,0,0.061610668897628784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,2,64,0,1,fp8,fp8,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,2,64,0,1,float16,fp8,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,4,64,0,1,float16,float16,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,4,64,0,1,float16,fp8,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,4,64,0,1,fp8,fp8,0,0.08140799899895985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,8,64,0,1,float16,float16,0,0.06553600231806438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,8,64,0,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,8,64,0,1,fp8,fp8,0,0.0817493349313736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,16,64,0,1,float16,float16,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,16,64,0,1,float16,fp8,0,0.03908266623814901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,16,64,0,1,fp8,fp8,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,1,64,0,1,float16,float16,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,1,64,0,1,float16,fp8,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,1,64,0,1,fp8,fp8,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,2,64,0,1,float16,float16,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,2,64,0,1,float16,fp8,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,2,64,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,4,64,0,1,float16,float16,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,4,64,0,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,4,64,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,8,64,0,1,float16,float16,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,8,64,0,1,float16,fp8,0,0.03822933385769526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,8,64,0,1,fp8,fp8,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,16,64,0,1,float16,float16,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,16,64,0,1,float16,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,16,64,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,1,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,1,64,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,1,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,2,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,2,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,2,64,0,1,fp8,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,4,64,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,4,64,0,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,4,64,0,1,fp8,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,8,64,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,8,64,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,8,64,0,1,float16,fp8,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,16,64,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,16,64,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,16,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,1,64,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,1,64,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,2,64,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,1,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,2,64,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,4,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,2,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,4,64,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,8,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,4,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,8,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,8,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,16,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,16,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,16,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,1,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,1,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,1,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,2,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,2,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,2,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,4,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,4,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,4,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,8,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,8,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,8,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,16,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,16,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,16,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,1,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,1,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,1,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,2,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,2,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,2,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,4,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,4,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,4,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,8,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,8,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,8,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,16,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,16,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,16,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,1,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,1,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,2,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,2,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,2,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,4,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,4,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,4,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,8,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,8,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,8,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,1,64,0,1,float16,float16,0,0.09352533022562663
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,1,64,0,1,float16,fp8,0,0.09335466225941975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,1,64,0,1,fp8,fp8,0,0.20155733823776245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,2,64,0,1,float16,float16,0,0.10018133123715718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,2,64,0,1,float16,fp8,0,0.09847467144330342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,2,64,0,1,fp8,fp8,0,0.21026132504145303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,4,64,0,1,float16,float16,0,0.12663466731707254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,4,64,0,1,float16,fp8,0,0.1114453375339508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,4,64,0,1,fp8,fp8,0,0.2657279968261719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,8,64,0,1,float16,float16,0,0.20855466524759927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,8,64,0,1,float16,fp8,0,0.16742400328318277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,8,64,0,1,fp8,fp8,0,0.3633493185043335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,16,64,0,1,float16,float16,0,0.07850666840871175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,16,64,0,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,16,64,0,1,fp8,fp8,0,0.19592533508936563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,1,64,0,1,float16,float16,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,1,64,0,1,float16,fp8,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,1,64,0,1,fp8,fp8,0,0.10752000411351521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,2,64,0,1,float16,float16,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,2,64,0,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,2,64,0,1,fp8,fp8,0,0.10820266604423523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,4,64,0,1,float16,float16,0,0.053077335158983864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,4,64,0,1,float16,fp8,0,0.05273599922657013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,4,64,0,1,fp8,fp8,0,0.10922666390736897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,8,64,0,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,8,64,0,1,float16,fp8,0,0.054101333022117615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,8,64,0,1,fp8,fp8,0,0.11025066177050273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,16,64,0,1,float16,float16,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,16,64,0,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,16,64,0,1,fp8,fp8,0,0.06365866462389629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,1,64,0,1,float16,float16,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,1,64,0,1,float16,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,1,64,0,1,fp8,fp8,0,0.062122667829195656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,2,64,0,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,2,64,0,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,2,64,0,1,fp8,fp8,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,4,64,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,4,64,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,4,64,0,1,fp8,fp8,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,8,64,0,1,float16,float16,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,8,64,0,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,8,64,0,1,fp8,fp8,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,16,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,16,64,0,1,float16,float16,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,16,64,0,1,fp8,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,1,64,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,1,64,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,1,64,0,1,fp8,fp8,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,2,64,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,2,64,0,1,float16,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,2,64,0,1,fp8,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,4,64,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,4,64,0,1,fp8,fp8,0,0.036864000062147774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,4,64,0,1,float16,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,8,64,0,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,8,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,16,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,8,64,0,1,fp8,fp8,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,16,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,16,64,0,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,1,64,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,1,64,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,1,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,2,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,2,64,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,2,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,4,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,4,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,4,64,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,8,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,8,64,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,8,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,16,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,16,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,16,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,1,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,1,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,1,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,2,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,2,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,2,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,4,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,4,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,4,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,8,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,8,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,8,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,16,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,16,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,16,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,1,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,1,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,2,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,4,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,4,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,8,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,8,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,8,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,16,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,16,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,16,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,2,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,8,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,8,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,8,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,16,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,16,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,16,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,8,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,1,64,0,1,float16,float16,0,0.057002668579419456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,1,64,0,1,float16,fp8,0,0.05717333157857259
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,1,64,0,1,fp8,fp8,0,0.17390932639439902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,2,64,0,1,float16,float16,0,0.05734399954477946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,2,64,0,1,float16,fp8,0,0.05734399954477946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,2,64,0,1,fp8,fp8,0,0.17425066232681274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,4,64,0,1,float16,float16,0,0.05751466751098633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,4,64,0,1,float16,fp8,0,0.057855998476346336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,4,64,0,1,fp8,fp8,0,0.17510400215784708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,8,64,0,1,float16,float16,0,0.0602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,8,64,0,1,float16,fp8,0,0.05922133227189382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,8,64,0,1,fp8,fp8,0,0.17749333381652832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,16,64,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,16,64,0,1,float16,fp8,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,1,64,0,1,float16,float16,0,0.03328000009059906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,16,64,0,1,fp8,fp8,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,1,64,0,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,1,64,0,1,fp8,fp8,0,0.09540266791979472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,2,64,0,1,float16,float16,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,2,64,0,1,fp8,fp8,0,0.09608532985051473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,2,64,0,1,float16,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,4,64,0,1,float16,float16,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,4,64,0,1,float16,fp8,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,4,64,0,1,fp8,fp8,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,8,64,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,8,64,0,1,float16,fp8,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,8,64,0,1,fp8,fp8,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,16,64,0,1,float16,float16,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,16,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,16,64,0,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,1,64,0,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,1,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,1,64,0,1,fp8,fp8,0,0.05529599885145823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,2,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,2,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,2,64,0,1,fp8,fp8,0,0.05580799778302511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,4,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,4,64,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,4,64,0,1,fp8,fp8,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,8,64,0,1,float16,float16,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,8,64,0,1,float16,fp8,0,0.021674667795499165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,16,64,0,1,float16,float16,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,8,64,0,1,fp8,fp8,0,0.056320001681645714
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,16,64,0,1,fp8,fp8,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,16,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,1,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,1,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,2,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,1,64,0,1,fp8,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,2,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,2,64,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,4,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,4,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,4,64,0,1,fp8,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,8,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,8,64,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,8,64,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,16,64,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,16,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,16,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,1,64,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,1,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,1,64,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,2,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,2,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,2,64,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,4,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,4,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,4,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,8,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,8,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,8,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,16,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,16,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,16,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,1,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,2,64,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,4,64,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,8,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,8,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,8,64,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,16,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,16,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,16,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,1,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,2,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,2,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,2,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,4,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,4,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,8,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,8,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,16,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,16,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,16,64,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,2,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,4,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,4,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,8,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,8,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,8,64,0,1,fp8,fp8,0,0.013264000415802002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,16,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,16,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,16,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,1,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,2,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,2,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,4,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,4,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,8,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,8,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,1,64,0,1,float16,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,1,64,0,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,1,64,0,1,fp8,fp8,0,0.16145066420237222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,2,64,0,1,float16,float16,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,2,64,0,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,4,64,0,1,float16,float16,0,0.04607999821503957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,2,64,0,1,fp8,fp8,0,0.16196266810099283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,4,64,0,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,4,64,0,1,fp8,fp8,0,0.1629866659641266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,8,64,0,1,float16,float16,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,8,64,0,1,float16,fp8,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,8,64,0,1,fp8,fp8,0,0.1628159979979197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,16,64,0,1,float16,float16,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,16,64,0,1,float16,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,16,64,0,1,fp8,fp8,0,0.0885759989420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,1,64,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,1,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,1,64,0,1,fp8,fp8,0,0.08840533097585042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,2,64,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,2,64,0,1,float16,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,2,64,0,1,fp8,fp8,0,0.08840533097585042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,4,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,4,64,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,4,64,0,1,fp8,fp8,0,0.08874666690826416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,8,64,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,8,64,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,8,64,0,1,fp8,fp8,0,0.0890880028406779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,16,64,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,16,64,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,16,64,0,1,fp8,fp8,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,1,64,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,1,64,0,1,float16,float16,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,1,64,0,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,2,64,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,2,64,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,2,64,0,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,4,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,4,64,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,4,64,0,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,8,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,8,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,8,64,0,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,16,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,16,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,16,64,0,1,fp8,fp8,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,1,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,1,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,1,64,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,2,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,2,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,2,64,0,1,fp8,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,4,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,4,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,4,64,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,8,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,8,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,8,64,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,16,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,16,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,16,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,1,64,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,2,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,2,64,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,4,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,4,64,0,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,8,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,8,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,8,64,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,16,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,16,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,16,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,1,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,1,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,1,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,2,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,2,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,4,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,4,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,8,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,8,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,8,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,16,64,0,1,float16,float16,0,0.008517333616813024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,16,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,16,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,1,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,2,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,4,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,4,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,8,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,8,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,8,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,16,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,16,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,16,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,1,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,2,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,4,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,4,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,8,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,8,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,8,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,16,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,16,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,16,64,0,1,fp8,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,1,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,1,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,2,64,0,1,float16,float16,0,0.007680000116427739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,4,64,0,1,float16,float16,0,0.008682666967312494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,2,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,4,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,8,64,0,1,float16,float16,0,0.008512000242869059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,8,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,8,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,float16,0,22.860458374023438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,fp8,0,23.376724243164062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,float16,0,22.467926025390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,fp8,0,22.46246337890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,fp8,0,22.42815907796224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,float16,0,22.565035502115887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,1,64,0,1,fp8,fp8,0,29.42480977376302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,2,64,0,1,fp8,fp8,0,29.773483276367188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,float16,0,12.027562459309896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,fp8,0,11.806549072265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,float16,0,11.403092702229818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,12,64,0,1,fp8,fp8,0,15.149908701578775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,fp8,0,11.909290313720703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,1,64,0,1,fp8,fp8,0,14.726656595865885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,float16,0,11.332608540852865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,4,64,0,1,fp8,fp8,0,30.279337565104168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,fp8,0,11.650389353434244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,float16,0,5.414400100708008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,2,64,0,1,fp8,fp8,0,14.74303944905599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,float16,0,11.248981475830078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,fp8,0,5.826218922932942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,fp8,0,11.655509948730469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,4,64,0,1,fp8,fp8,0,14.900564829508463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,12,64,0,1,fp8,fp8,0,7.734613418579102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,float16,0,5.7987410227457685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,fp8,0,5.789183934529622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,float16,0,5.588479995727539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,fp8,0,5.564586639404297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,1,64,0,1,fp8,fp8,0,7.363242467244466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,2,64,0,1,fp8,fp8,0,7.303338368733724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,float16,0,5.80300776163737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,fp8,0,5.722282409667969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,float16,0,2.6540373166402182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,float16,0,2.659498691558838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,4,64,0,1,fp8,fp8,0,7.609002431233724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,fp8,0,2.533034642537435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,12,64,0,1,fp8,fp8,0,3.832319895426432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,fp8,0,2.605226675669352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,1,64,0,1,fp8,fp8,0,3.7172905604044595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,float16,0,2.5024852752685547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,fp8,0,2.6193920771280923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,2,64,0,1,fp8,fp8,0,3.6959571838378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,float16,0,2.547541300455729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,fp8,0,2.5101653734842935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,4,64,0,1,fp8,fp8,0,3.7179733912150064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,float16,0,13.498538970947266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,fp8,0,13.70999526977539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,1,64,0,1,fp8,fp8,0,16.606549580891926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,float16,0,13.51748275756836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,fp8,0,13.54257074991862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,float16,0,12.97646967569987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,fp8,0,13.20294443766276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,2,64,0,1,fp8,fp8,0,17.155072530110676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,float16,0,6.899370829264323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,fp8,0,6.482261021931966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,4,64,0,1,fp8,fp8,0,17.14346694946289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,float16,0,6.310911814371745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,fp8,0,6.18717892964681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,12,64,0,1,fp8,fp8,0,8.938154856363932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,1,64,0,1,fp8,fp8,0,8.339967727661133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,float16,0,6.918485641479492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,fp8,0,6.295722961425781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,2,64,0,1,fp8,fp8,0,8.37393061319987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,float16,0,6.709248224894206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,float16,0,3.217066764831543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,fp8,0,6.55735460917155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,fp8,0,3.105109214782715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,4,64,0,1,fp8,fp8,0,8.546986897786459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,12,64,0,1,fp8,fp8,0,4.466005325317383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,float16,0,2.956629435221354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,fp8,0,2.779306729634603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,fp8,0,2.896042823791504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,float16,0,2.93341859181722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,1,64,0,1,fp8,fp8,0,4.137301445007324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,2,64,0,1,fp8,fp8,0,4.131327946980794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,float16,0,2.819242795308431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,fp8,0,2.877098719278971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,4,64,0,1,fp8,fp8,0,4.185941378275554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,float16,0,1.4709760348002117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,fp8,0,1.4986240069071453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,float16,0,1.5655253728230794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,12,64,0,1,fp8,fp8,0,2.2125226656595864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,fp8,0,1.6141653060913086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,1,64,0,1,fp8,fp8,0,2.126335938771566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,float16,0,1.4895787239074707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,fp8,0,1.5800320307413738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,2,64,0,1,fp8,fp8,0,2.1249705950419107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,float16,0,1.6421546936035156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,fp8,0,1.616213321685791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,4,64,0,1,fp8,fp8,0,2.137258688608805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,float16,0,9.52780787150065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,fp8,0,9.33785629272461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,float16,0,9.512106577555338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,1,64,0,1,fp8,fp8,0,11.83965810139974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,fp8,0,9.59880510965983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,2,64,0,1,fp8,fp8,0,11.850410461425781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,float16,0,9.653760274251303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,fp8,0,9.636522928873697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,float16,0,4.652714729309082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,fp8,0,4.505087852478027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,float16,0,4.527786572774251
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,12,64,0,1,fp8,fp8,0,6.360746383666992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,4,64,0,1,fp8,fp8,0,12.064768473307291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,fp8,0,4.404735883076985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,float16,0,4.016469319661458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,fp8,0,4.402005195617676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,1,64,0,1,fp8,fp8,0,5.814271926879883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,2,64,0,1,fp8,fp8,0,5.845845540364583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,float16,0,4.2214399973551435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,fp8,0,4.558677355448405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,4,64,0,1,fp8,fp8,0,5.94978141784668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,float16,0,2.192042668660482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,fp8,0,2.1978453000386557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,12,64,0,1,fp8,fp8,0,3.0704641342163086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,float16,0,1.9616427421569824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,fp8,0,2.025983969370524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,1,64,0,1,fp8,fp8,0,2.8569599787394204
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,float16,0,2.004138628641764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,fp8,0,1.9537919362386067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,2,64,0,1,fp8,fp8,0,2.8578131993611655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,float16,0,1.9865600268046062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,fp8,0,1.9194879531860352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,fp8,0,1.063424030939738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,float16,0,1.0678613185882568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,12,64,0,1,fp8,fp8,0,1.5774720509847004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,4,64,0,1,fp8,fp8,0,2.8528639475504556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,float16,0,1.1461973190307617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,fp8,0,1.1369813283284504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,1,64,0,1,fp8,fp8,0,1.521663983662923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,float16,0,1.1775999863942463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,fp8,0,1.1484159628550212
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,2,64,0,1,fp8,fp8,0,1.567914644877116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,float16,0,1.0625706513722737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,fp8,0,1.156266689300537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,4,64,0,1,fp8,fp8,0,1.536853313446045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,float16,0,12.54144032796224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,fp8,0,12.697087605794271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,1,64,0,1,fp8,fp8,0,15.303508758544922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,float16,0,12.675071716308594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,fp8,0,12.580181121826172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,fp8,0,12.549461364746094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,float16,0,12.692991892496744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,2,64,0,1,fp8,fp8,0,16.13312021891276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,float16,0,6.484991709391276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,fp8,0,6.268074671427409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,float16,0,5.445119857788086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,12,64,0,1,fp8,fp8,0,8.268970489501953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,4,64,0,1,fp8,fp8,0,16.282965342203777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,fp8,0,6.083242416381836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,float16,0,5.626880009969075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,1,64,0,1,fp8,fp8,0,7.550463994344075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,fp8,0,5.601792017618815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,float16,0,3.0071465174357095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,2,64,0,1,fp8,fp8,0,7.807146708170573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,float16,0,6.078634897867839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,fp8,0,6.191786448160808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,fp8,0,2.973525365193685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,4,64,0,1,fp8,fp8,0,7.806975682576497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,12,64,0,1,fp8,fp8,0,4.031829198201497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,float16,0,2.7187201182047525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,fp8,0,2.5745066006978354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,fp8,0,2.5323519706726074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,1,64,0,1,fp8,fp8,0,3.723093350728353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,float16,0,2.681002616882324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,2,64,0,1,fp8,fp8,0,3.796991984049479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,float16,0,2.6596693992614746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,fp8,0,2.628608067830404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,4,64,0,1,fp8,fp8,0,3.8488747278849282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,float16,0,1.3955413500467937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,fp8,0,1.3243733247121174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,12,64,0,1,fp8,fp8,0,2.0363945960998535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,fp8,0,1.3253973325093586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,float16,0,1.3252267042795818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,1,64,0,1,fp8,fp8,0,1.8662400245666504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,float16,0,1.2864853541056316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,fp8,0,1.3192533651987712
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,2,64,0,1,fp8,fp8,0,1.8597547213236492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,float16,0,1.2799999713897705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,fp8,0,1.280682643254598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,fp8,0,0.7447893619537354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,4,64,0,1,fp8,fp8,0,1.89354674021403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,float16,0,0.7219200134277344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,12,64,0,1,fp8,fp8,0,1.0255359808603923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,float16,0,0.730282704035441
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,fp8,0,0.7538346449534098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,1,64,0,1,fp8,fp8,0,1.0147840181986492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,float16,0,0.7572480042775472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,fp8,0,0.7336959838867188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,2,64,0,1,fp8,fp8,0,1.0195626417795818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,float16,0,0.7500800291697184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,fp8,0,0.764245351155599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,4,64,0,1,fp8,fp8,0,1.025877316792806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,float16,0,7.228074391682942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,fp8,0,7.184725443522136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,float16,0,7.256746927897136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,1,64,0,1,fp8,fp8,0,8.784213383992514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,fp8,0,7.464618682861328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,2,64,0,1,fp8,fp8,0,9.047893524169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,float16,0,7.274496078491211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,fp8,0,7.278421401977539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,float16,0,3.7374293009440103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,fp8,0,3.6241067250569663
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,float16,0,3.225770632425944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,12,64,0,1,fp8,fp8,0,4.78651746114095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,4,64,0,1,fp8,fp8,0,9.377621332804361
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,fp8,0,3.031381289164225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,float16,0,3.2375466028849282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,1,64,0,1,fp8,fp8,0,4.263253211975098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,fp8,0,3.062272071838379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,2,64,0,1,fp8,fp8,0,4.361045201619466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,float16,0,3.3409706751505532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,fp8,0,3.2073386510213218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,float16,0,1.8044586181640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,4,64,0,1,fp8,fp8,0,4.464127858479817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,fp8,0,1.7711787223815918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,12,64,0,1,fp8,fp8,0,2.3683412869771323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,float16,0,1.4615893363952637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,fp8,0,1.4621013005574544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,float16,0,1.4312106768290203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,1,64,0,1,fp8,fp8,0,2.0901546478271484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,fp8,0,1.4776320457458496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,2,64,0,1,fp8,fp8,0,2.101247946421305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,float16,0,1.5010132789611816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,fp8,0,1.4341120719909668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,4,64,0,1,fp8,fp8,0,2.1611520449320474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,float16,0,0.7877973715464274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,fp8,0,0.7770453294118246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,12,64,0,1,fp8,fp8,0,1.2105387051900227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,float16,0,0.7645866870880127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,fp8,0,0.7633919715881348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,1,64,0,1,fp8,fp8,0,1.1098453203837078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,float16,0,0.7581013043721517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,fp8,0,0.7533226807912191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,2,64,0,1,fp8,fp8,0,1.0898773670196533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,float16,0,0.7596373558044434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,fp8,0,0.769536018371582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,4,64,0,1,fp8,fp8,0,1.1154773235321045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,float16,0,0.4346880118052165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,fp8,0,0.43298133214314777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,12,64,0,1,fp8,fp8,0,0.6306133270263672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,float16,0,0.4358826478322347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,fp8,0,0.4360533157984416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,1,64,0,1,fp8,fp8,0,0.6282240152359009
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,float16,0,0.4331520001093547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,fp8,0,0.43485867977142334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,2,64,0,1,fp8,fp8,0,0.6324906746546427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,float16,0,0.429909348487854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,fp8,0,0.4251306851704915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,4,64,0,1,fp8,fp8,0,0.626858671506246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,float16,0,6.48362668355306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,fp8,0,6.81062380472819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,1,64,0,1,fp8,fp8,0,8.427520116170248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,float16,0,6.909098943074544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,fp8,0,6.737066904703776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,2,64,0,1,fp8,fp8,0,8.929791768391928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,float16,0,6.939818700154622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,fp8,0,7.024810791015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,float16,0,3.8910293579101562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,fp8,0,3.78436279296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,float16,0,2.9562880198160806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,12,64,0,1,fp8,fp8,0,4.7143252690633135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,4,64,0,1,fp8,fp8,0,9.343658447265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,fp8,0,3.027455965677897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,float16,0,3.031551996866862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,1,64,0,1,fp8,fp8,0,3.930111885070801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,fp8,0,2.9238611857096353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,float16,0,3.205631891886393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,2,64,0,1,fp8,fp8,0,4.129621187845866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,fp8,0,3.2314027150472007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,4,64,0,1,fp8,fp8,0,4.22434139251709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,float16,0,1.8701653480529785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,fp8,0,1.7563306490580242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,12,64,0,1,fp8,fp8,0,2.2795947392781577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,float16,0,1.3489492734273274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,fp8,0,1.3632853825887044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,float16,0,1.3905919392903645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,1,64,0,1,fp8,fp8,0,1.9259732564290364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,fp8,0,1.3579947153727214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,2,64,0,1,fp8,fp8,0,1.9594240188598633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,float16,0,1.4776320457458496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,fp8,0,1.425920009613037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,4,64,0,1,fp8,fp8,0,2.038442611694336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,float16,0,0.7997439702351888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,fp8,0,0.7490560213724772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,12,64,0,1,fp8,fp8,0,1.176917314529419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,float16,0,0.7024640242258707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,fp8,0,0.7178239822387695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,1,64,0,1,fp8,fp8,0,0.976213296254476
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,float16,0,0.6959786415100098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,fp8,0,0.6879573663075765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,2,64,0,1,fp8,fp8,0,0.9823573430379232
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,float16,0,0.7045119603474935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,fp8,0,0.7075839837392172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,4,64,0,1,fp8,fp8,0,0.9869653383890787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,float16,0,0.39611732959747314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,fp8,0,0.3848533233006795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,12,64,0,1,fp8,fp8,0,0.5476693312327067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,float16,0,0.36744534969329834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,fp8,0,0.3729066848754883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,float16,0,0.36932265758514404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,fp8,0,0.37137067317962646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,1,64,0,1,fp8,fp8,0,0.5340160131454468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,2,64,0,1,fp8,fp8,0,0.5374293327331543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,float16,0,0.37137067317962646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,fp8,0,0.37137067317962646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,4,64,0,1,fp8,fp8,0,0.5406719843546549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,float16,0,0.2259626587231954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,12,64,0,1,fp8,fp8,0,0.3198293248812358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,fp8,0,0.22459733486175537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,float16,0,0.23534933725992838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,fp8,0,0.23244800170262656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,1,64,0,1,fp8,fp8,0,0.3150506615638733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,float16,0,0.2302293380101522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,fp8,0,0.23040000597635904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,2,64,0,1,fp8,fp8,0,0.311296006043752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,float16,0,0.23483733336130777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,fp8,0,0.22715733448664346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,4,64,0,1,fp8,fp8,0,0.32153600454330444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,float16,0,3.7913599014282227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,fp8,0,3.5932159423828125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,float16,0,3.9560534159342446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,1,64,0,1,fp8,fp8,0,4.708522796630859
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,fp8,0,3.822421391805013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,float16,0,4.241066614786784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,2,64,0,1,fp8,fp8,0,5.039957364400228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,fp8,0,4.158122698465983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,float16,0,2.3997440338134766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,4,64,0,1,fp8,fp8,0,5.427029291788737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,fp8,0,2.2741333643595376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,float16,0,1.6931840578715007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,12,64,0,1,fp8,fp8,0,2.813781420389811
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,fp8,0,1.744383970896403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,1,64,0,1,fp8,fp8,0,2.2488746643066406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,float16,0,1.768448034922282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,fp8,0,1.7174186706542969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,2,64,0,1,fp8,fp8,0,2.3376213709513345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,float16,0,1.8836480776468914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,fp8,0,1.8635093371073406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,4,64,0,1,fp8,fp8,0,2.4255146980285645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,fp8,0,1.0571093559265137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,float16,0,1.1368107000986736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,12,64,0,1,fp8,fp8,0,1.4202879269917805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,float16,0,0.803669293721517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,fp8,0,0.8272213141123453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,1,64,0,1,fp8,fp8,0,1.1130879720052083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,float16,0,0.7917226950327555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,fp8,0,0.792405366897583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,2,64,0,1,fp8,fp8,0,1.1236693064371746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,float16,0,0.8330240249633789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,fp8,0,0.8161280155181885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,4,64,0,1,fp8,fp8,0,1.2069546381632488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,float16,0,0.44646398226420086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,fp8,0,0.42803200085957843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,12,64,0,1,fp8,fp8,0,0.7046826680501302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,fp8,0,0.4094293514887492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,float16,0,0.4089173475901286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,1,64,0,1,fp8,fp8,0,0.5927253166834513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,float16,0,0.41523198286692303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,fp8,0,0.4092586835225423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,2,64,0,1,fp8,fp8,0,0.5882879892985026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,float16,0,0.41574398676554364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,fp8,0,0.4174506664276123
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,4,64,0,1,fp8,fp8,0,0.5930666526158651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,fp8,0,0.233130673567454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,float16,0,0.2409813404083252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,12,64,0,1,fp8,fp8,0,0.3476479848225911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,float16,0,0.23278933763504028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,fp8,0,0.23773866891860962
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,1,64,0,1,fp8,fp8,0,0.3295573393503825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,float16,0,0.23569067319234213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,fp8,0,0.23637332518895468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,2,64,0,1,fp8,fp8,0,0.33604268232981366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,fp8,0,0.2373973329861959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,float16,0,0.23125332593917847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,4,64,0,1,fp8,fp8,0,0.33399466673533124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,float16,0,0.14899200201034546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,fp8,0,0.14472533265749613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,12,64,0,1,fp8,fp8,0,0.20053333044052124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,float16,0,0.15633066495259604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,fp8,0,0.15308800339698792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,1,64,0,1,fp8,fp8,0,0.2027519941329956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,float16,0,0.15530666708946228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,fp8,0,0.15530666708946228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,2,64,0,1,fp8,fp8,0,0.20087466637293497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,float16,0,0.15052800377209982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,fp8,0,0.1570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,4,64,0,1,fp8,fp8,0,0.20121600230534872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,float16,0,3.9280640284220376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,fp8,0,3.87447452545166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,float16,0,4.256938616434733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,1,64,0,1,fp8,fp8,0,4.733440081278483
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,fp8,0,4.272128105163574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,2,64,0,1,fp8,fp8,0,5.217962582906087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,float16,0,4.633088111877441
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,fp8,0,4.483413378397624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,float16,0,2.7236693700154624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,fp8,0,2.536106745402018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,12,64,0,1,fp8,fp8,0,3.0441811879475913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,4,64,0,1,fp8,fp8,0,5.669375737508138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,float16,0,1.8001920382181804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,fp8,0,1.7254400253295898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,1,64,0,1,fp8,fp8,0,2.190336068471273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,float16,0,1.8382506370544434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,fp8,0,1.8319360415140789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,2,64,0,1,fp8,fp8,0,2.343935966491699
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,float16,0,2.04475736618042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,fp8,0,2.0116480191548667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,float16,0,1.2354559898376465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,4,64,0,1,fp8,fp8,0,2.5077759424845376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,fp8,0,1.170090675354004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,12,64,0,1,fp8,fp8,0,1.4551040331522624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,fp8,0,0.7765333652496338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,float16,0,0.7879679997762045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,float16,0,0.808789332707723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,1,64,0,1,fp8,fp8,0,1.0690560340881348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,fp8,0,0.7767039934794108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,2,64,0,1,fp8,fp8,0,1.1185493469238281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,float16,0,0.8989013036092123
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,fp8,0,0.8668159643809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,float16,0,0.5191680192947388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,4,64,0,1,fp8,fp8,0,1.2057600021362305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,fp8,0,0.44390400250752765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,12,64,0,1,fp8,fp8,0,0.7336959838867188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,float16,0,0.3860479990641276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,fp8,0,0.39765334129333496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,1,64,0,1,fp8,fp8,0,0.5376000006993612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,float16,0,0.3964586655298869
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,fp8,0,0.3979946772257487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,2,64,0,1,fp8,fp8,0,0.5461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,float16,0,0.3993599812189738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,fp8,0,0.3891199827194214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,4,64,0,1,fp8,fp8,0,0.5548373460769653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,float16,0,0.22203733523686728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,fp8,0,0.21998933951059976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,12,64,0,1,fp8,fp8,0,0.30668799082438153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,fp8,0,0.2058239976565043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,float16,0,0.2053119937578837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,1,64,0,1,fp8,fp8,0,0.289792001247406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,float16,0,0.20206934213638306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,fp8,0,0.20155733823776245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,2,64,0,1,fp8,fp8,0,0.2945706645647685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,float16,0,0.202239990234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,4,64,0,1,fp8,fp8,0,0.29525333642959595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,float16,0,0.1237333317597707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,fp8,0,0.20992000897725424
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,fp8,0,0.12236799796422322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,12,64,0,1,fp8,fp8,0,0.16913066307703653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,float16,0,0.12441600362459819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,fp8,0,0.1250986655553182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,1,64,0,1,fp8,fp8,0,0.16639999548594156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,float16,0,0.12339199582735698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,fp8,0,0.1230506698290507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,float16,0,0.1237333317597707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,2,64,0,1,fp8,fp8,0,0.16776533921559653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,fp8,0,0.12288000186284383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,4,64,0,1,fp8,fp8,0,0.16742400328318277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,float16,0,0.08584533135096233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,fp8,0,0.08277333279450734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,12,64,0,1,fp8,fp8,0,0.11264000336329143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,float16,0,0.08959999680519104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,fp8,0,0.0846506655216217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,1,64,0,1,fp8,fp8,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,fp8,0,0.08260266482830048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,float16,0,0.08430932958920796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,2,64,0,1,fp8,fp8,0,0.11332266529401143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,float16,0,0.08482133348782857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,fp8,0,0.08499200145403545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,4,64,0,1,fp8,fp8,0,0.11349333326021831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,float16,0,2.311338742574056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,1,64,0,1,fp8,fp8,0,2.7567787170410156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,float16,0,2.525183995564779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,fp8,0,2.28113063176473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,fp8,0,2.5140906969706216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,2,64,0,1,fp8,fp8,0,3.085141181945801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,float16,0,2.8047361373901367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,fp8,0,2.7496105829874673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,4,64,0,1,fp8,fp8,0,3.3887573877970376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,float16,0,1.7242453893025715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,fp8,0,1.6213332811991374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,float16,0,1.06496000289917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,12,64,0,1,fp8,fp8,0,1.9094187418619792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,fp8,0,1.0263893604278564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,1,64,0,1,fp8,fp8,0,1.3313706715901692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,float16,0,1.1113813718159993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,fp8,0,1.0890239874521892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,2,64,0,1,fp8,fp8,0,1.3972479502360027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,float16,0,1.2557653586069744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,fp8,0,1.2392106850941975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,float16,0,0.7814826965332031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,4,64,0,1,fp8,fp8,0,1.5122772852579753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,fp8,0,0.7202133337656657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,12,64,0,1,fp8,fp8,0,0.9419093132019043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,float16,0,0.46916266282399494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,fp8,0,0.45653335253397626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,float16,0,0.47547733783721924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,1,64,0,1,fp8,fp8,0,0.6304426590601603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,fp8,0,0.45960533618927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,float16,0,0.4944213231404622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,2,64,0,1,fp8,fp8,0,0.6451199849446615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,fp8,0,0.48503466447194415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,4,64,0,1,fp8,fp8,0,0.7350613276163737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,float16,0,0.2611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,fp8,0,0.2583893338839213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,float16,0,0.22988800207773843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,12,64,0,1,fp8,fp8,0,0.4476586580276489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,fp8,0,0.2254506746927897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,float16,0,0.23688532908757529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,1,64,0,1,fp8,fp8,0,0.32631466786066693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,fp8,0,0.23296000560124716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,2,64,0,1,fp8,fp8,0,0.3242666721343994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,float16,0,0.24422399202982584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,fp8,0,0.23944532871246338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,4,64,0,1,fp8,fp8,0,0.3319466710090637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,float16,0,0.13038933277130127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,fp8,0,0.13090133666992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,12,64,0,1,fp8,fp8,0,0.19746132691701254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,float16,0,0.13004799683888754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,fp8,0,0.13004799683888754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,1,64,0,1,fp8,fp8,0,0.18397865692774454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,float16,0,0.12868266304334006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,fp8,0,0.12902399897575378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,2,64,0,1,fp8,fp8,0,0.19336533546447754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,float16,0,0.13004799683888754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,fp8,0,0.12970667084058127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,4,64,0,1,fp8,fp8,0,0.1838080088297526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,float16,0,0.08447999755541484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,fp8,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,12,64,0,1,fp8,fp8,0,0.10666666428248088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,fp8,0,0.0897706647713979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,float16,0,0.08891733487447102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,1,64,0,1,fp8,fp8,0,0.105813334385554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,float16,0,0.09233066439628601
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,fp8,0,0.0865280032157898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,2,64,0,1,fp8,fp8,0,0.10410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,float16,0,0.08823466300964355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,fp8,0,0.08533333738644917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,4,64,0,1,fp8,fp8,0,0.1063253382841746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,float16,0,0.053077335158983864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,fp8,0,0.05376000205675761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,12,64,0,1,fp8,fp8,0,0.07338666419188182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,1,64,0,1,fp8,fp8,0,0.07287466526031494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,float16,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,2,64,0,1,fp8,fp8,0,0.07372800012429555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,float16,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,4,64,0,1,fp8,fp8,0,0.0727040022611618
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,fp8,0,0.053077335158983864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,float16,0,2.550442695617676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,fp8,0,2.577066739400228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,1,64,0,1,fp8,fp8,0,2.795349438985189
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,float16,0,2.8144639333089194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,fp8,0,2.749269485473633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,2,64,0,1,fp8,fp8,0,3.0960639317830405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,float16,0,3.17576535542806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,fp8,0,3.0639785130818686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,fp8,0,1.971882661183675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,float16,0,2.120021343231201
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,float16,0,1.152511994043986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,4,64,0,1,fp8,fp8,0,3.371349334716797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,12,64,0,1,fp8,fp8,0,2.0906666119893393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,fp8,0,1.1624106566111247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,1,64,0,1,fp8,fp8,0,1.3752320607503254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,float16,0,1.2356266975402832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,fp8,0,1.1905706723531086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,2,64,0,1,fp8,fp8,0,1.5083519617716472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,float16,0,1.4440107345581055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,fp8,0,1.3839359283447266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,4,64,0,1,fp8,fp8,0,1.6523946126302083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,float16,0,0.9739946524302164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,fp8,0,0.8980480035146078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,12,64,0,1,fp8,fp8,0,1.0265599886576335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,float16,0,0.48230401674906415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,fp8,0,0.49237334728240967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,1,64,0,1,fp8,fp8,0,0.6539946794509888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,float16,0,0.5058559974034628
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,fp8,0,0.494762659072876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,2,64,0,1,fp8,fp8,0,0.6956373055775961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,float16,0,0.6143999894460043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,4,64,0,1,fp8,fp8,0,0.7906986872355143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,fp8,0,0.577023983001709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,float16,0,0.3619840145111084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,fp8,0,0.2950826684633891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,float16,0,0.2373973329861959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,12,64,0,1,fp8,fp8,0,0.5138773520787557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,fp8,0,0.24115200837453207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,1,64,0,1,fp8,fp8,0,0.31470932563145954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,float16,0,0.23825067281723022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,2,64,0,1,fp8,fp8,0,0.32921600341796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,fp8,0,0.2392746607462565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,float16,0,0.24302933613459268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,fp8,0,0.24593067169189453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,4,64,0,1,fp8,fp8,0,0.32631466786066693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,float16,0,0.13414399822553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,fp8,0,0.12970667084058127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,12,64,0,1,fp8,fp8,0,0.17834667364756265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,float16,0,0.1281706690788269
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,float16,0,0.12834133704503378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,1,64,0,1,fp8,fp8,0,0.17339734236399332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,fp8,0,0.1281706690788269
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,fp8,0,0.1293653349081675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,2,64,0,1,fp8,fp8,0,0.17510400215784708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,float16,0,0.12987732887268066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,fp8,0,0.12731732924779257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,4,64,0,1,fp8,fp8,0,0.17254400253295898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,float16,0,0.07816533247629802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,fp8,0,0.07714133461316426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,float16,0,0.07645866771539052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,fp8,0,0.08106666803359985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,12,64,0,1,fp8,fp8,0,0.10069333513577779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,1,64,0,1,fp8,fp8,0,0.10035199920336406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,float16,0,0.07560533285140991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,fp8,0,0.0773119976123174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,2,64,0,1,fp8,fp8,0,0.10154666503270467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,float16,0,0.07594666878382365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,fp8,0,0.0766293356815974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,4,64,0,1,fp8,fp8,0,0.10171733299891154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,float16,0,0.04761599997679392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,fp8,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,12,64,0,1,fp8,fp8,0,0.06365866462389629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,fp8,0,0.046762665112813316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,float16,0,0.04727466901143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,1,64,0,1,fp8,fp8,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,float16,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,fp8,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,2,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,fp8,0,0.04727466901143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,4,64,0,1,fp8,fp8,0,0.06348800162474315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,float16,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,12,64,0,1,fp8,fp8,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,1,64,0,1,fp8,fp8,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,float16,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,2,64,0,1,fp8,fp8,0,0.04607999821503957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,float16,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,4,64,0,1,fp8,fp8,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,1,64,0,1,float16,float16,0,1.930239995320638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,1,64,0,1,float16,fp8,0,1.9194879531860352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,1,64,0,1,fp8,fp8,0,1.97324800491333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,2,64,0,1,float16,float16,0,2.214570681254069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,2,64,0,1,float16,fp8,0,2.1730987230936685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,2,64,0,1,fp8,fp8,0,2.2824959754943848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,4,64,0,1,float16,float16,0,2.675882657368978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,4,64,0,1,float16,fp8,0,2.5442986488342285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,12,64,0,1,float16,float16,0,1.970688025156657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,4,64,0,1,fp8,fp8,0,2.570751984914144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,12,64,0,1,float16,fp8,0,1.8515626589457195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,1,64,0,1,float16,float16,0,0.861525297164917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,12,64,0,1,fp8,fp8,0,1.6855039596557617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,1,64,0,1,float16,fp8,0,0.8256853421529134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,1,64,0,1,fp8,fp8,0,0.9555626710255941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,2,64,0,1,float16,float16,0,0.9238186677296957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,2,64,0,1,float16,fp8,0,0.8982186317443848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,2,64,0,1,fp8,fp8,0,1.0854399998982747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,4,64,0,1,float16,float16,0,1.2086613178253174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,4,64,0,1,float16,fp8,0,1.1514879862467449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,12,64,0,1,float16,fp8,0,0.8166399796803793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,12,64,0,1,float16,float16,0,0.889685312906901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,12,64,0,1,fp8,fp8,0,0.8287573655446371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,4,64,0,1,fp8,fp8,0,1.237504005432129
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,1,64,0,1,float16,float16,0,0.3322880069414775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,1,64,0,1,fp8,fp8,0,0.44492801030476886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,1,64,0,1,float16,fp8,0,0.32392533620198566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,2,64,0,1,float16,float16,0,0.3653973340988159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,2,64,0,1,float16,fp8,0,0.34781865278879803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,2,64,0,1,fp8,fp8,0,0.4870826800664266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,4,64,0,1,float16,float16,0,0.474453330039978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,4,64,0,1,float16,fp8,0,0.4427093267440796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,4,64,0,1,fp8,fp8,0,0.5845333337783813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,12,64,0,1,float16,float16,0,0.29047467311223346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,12,64,0,1,float16,fp8,0,0.22749867041905722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,12,64,0,1,fp8,fp8,0,0.40277334054311115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,1,64,0,1,float16,fp8,0,0.15428266922632852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,1,64,0,1,float16,float16,0,0.15633066495259604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,2,64,0,1,float16,float16,0,0.15496533115704855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,1,64,0,1,fp8,fp8,0,0.20241065820058188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,2,64,0,1,float16,fp8,0,0.15786666671435037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,2,64,0,1,fp8,fp8,0,0.2044586737950643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,4,64,0,1,float16,float16,0,0.15803733468055725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,4,64,0,1,float16,fp8,0,0.15940266847610474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,12,64,0,1,float16,float16,0,0.09352533022562663
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,4,64,0,1,fp8,fp8,0,0.211626668771108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,12,64,0,1,float16,fp8,0,0.09096533060073853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,12,64,0,1,fp8,fp8,0,0.12117333213488261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,1,64,0,1,float16,float16,0,0.08891733487447102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,1,64,0,1,float16,fp8,0,0.08755200107892354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,1,64,0,1,fp8,fp8,0,0.11212799946467082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,2,64,0,1,float16,fp8,0,0.09062400460243225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,2,64,0,1,float16,float16,0,0.08942932883898418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,2,64,0,1,fp8,fp8,0,0.11161599556605022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,4,64,0,1,float16,float16,0,0.0885759989420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,4,64,0,1,float16,fp8,0,0.08925867080688477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,4,64,0,1,fp8,fp8,0,0.1129813293615977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,12,64,0,1,float16,float16,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,12,64,0,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,12,64,0,1,fp8,fp8,0,0.0682666649421056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,1,64,0,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,1,64,0,1,fp8,fp8,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,1,64,0,1,float16,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,2,64,0,1,float16,float16,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,2,64,0,1,float16,fp8,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,2,64,0,1,fp8,fp8,0,0.06809600194295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,4,64,0,1,float16,float16,0,0.05000533163547516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,4,64,0,1,float16,fp8,0,0.05034666756788889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,4,64,0,1,fp8,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,12,64,0,1,float16,float16,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,12,64,0,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,12,64,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,1,64,0,1,float16,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,1,64,0,1,float16,float16,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,1,64,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,2,64,0,1,float16,float16,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,2,64,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,2,64,0,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,4,64,0,1,float16,float16,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,4,64,0,1,float16,fp8,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,4,64,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,12,64,0,1,float16,float16,0,0.021674667795499165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,12,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,12,64,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,1,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,1,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,1,64,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,2,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,2,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,2,64,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,4,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,4,64,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,4,64,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,12,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,12,64,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,12,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,1,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,1,64,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,1,64,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,2,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,2,64,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,2,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,4,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,4,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,4,64,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,1,64,0,1,float16,float16,0,0.7434240182240804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,2,64,0,1,float16,float16,0,0.838485320409139
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,1,64,0,1,fp8,fp8,0,0.749397357304891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,2,64,0,1,float16,fp8,0,0.8123733202616373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,1,64,0,1,float16,fp8,0,0.733184019724528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,2,64,0,1,fp8,fp8,0,0.8797866503397623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,4,64,0,1,float16,float16,0,1.1381759643554688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,4,64,0,1,float16,fp8,0,1.0890239874521892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,4,64,0,1,fp8,fp8,0,1.0397013028462727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,12,64,0,1,float16,float16,0,0.8900266488393148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,12,64,0,1,float16,fp8,0,0.8261973063151041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,1,64,0,1,float16,fp8,0,0.24388267596562704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,1,64,0,1,float16,float16,0,0.2582186659177144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,12,64,0,1,fp8,fp8,0,0.7417173385620117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,1,64,0,1,fp8,fp8,0,0.33791999022165936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,2,64,0,1,float16,float16,0,0.29047467311223346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,2,64,0,1,float16,fp8,0,0.27801599105199176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,2,64,0,1,fp8,fp8,0,0.38468265533447266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,4,64,0,1,float16,float16,0,0.4237653414408366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,4,64,0,1,float16,fp8,0,0.38860801855723065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,12,64,0,1,float16,float16,0,0.2701653242111206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,12,64,0,1,float16,fp8,0,0.19438934326171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,4,64,0,1,fp8,fp8,0,0.4763306776682536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,12,64,0,1,fp8,fp8,0,0.3534506559371948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,1,64,0,1,float16,float16,0,0.11417599519093831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,1,64,0,1,float16,fp8,0,0.11349333326021831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,1,64,0,1,fp8,fp8,0,0.1469439963499705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,2,64,0,1,float16,float16,0,0.11332266529401143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,2,64,0,1,float16,fp8,0,0.11776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,2,64,0,1,fp8,fp8,0,0.14574933052062988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,4,64,0,1,float16,float16,0,0.11434666315714519
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,4,64,0,1,float16,fp8,0,0.11571199695269267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,4,64,0,1,fp8,fp8,0,0.1532586713631948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,12,64,0,1,float16,float16,0,0.07014399766921997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,12,64,0,1,float16,fp8,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,12,64,0,1,fp8,fp8,0,0.08840533097585042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,1,64,0,1,float16,float16,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,1,64,0,1,float16,fp8,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,1,64,0,1,fp8,fp8,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,2,64,0,1,float16,float16,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,2,64,0,1,float16,fp8,0,0.06519466638565063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,2,64,0,1,fp8,fp8,0,0.08089600006739299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,4,64,0,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,4,64,0,1,float16,fp8,0,0.06417066852251689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,4,64,0,1,fp8,fp8,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,12,64,0,1,float16,float16,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,12,64,0,1,fp8,fp8,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,12,64,0,1,float16,fp8,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,1,64,0,1,float16,float16,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,1,64,0,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,1,64,0,1,fp8,fp8,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,2,64,0,1,float16,float16,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,2,64,0,1,float16,fp8,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,2,64,0,1,fp8,fp8,0,0.04761599997679392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,4,64,0,1,float16,float16,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,4,64,0,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,4,64,0,1,fp8,fp8,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,12,64,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,12,64,0,1,float16,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,12,64,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,1,64,0,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,1,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,1,64,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,2,64,0,1,float16,float16,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,2,64,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,2,64,0,1,fp8,fp8,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,4,64,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,4,64,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,4,64,0,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,12,64,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,12,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,12,64,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,1,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,1,64,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,1,64,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,2,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,2,64,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,2,64,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,4,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,4,64,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,4,64,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,12,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,12,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,12,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,1,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,1,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,1,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,2,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,2,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,2,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,4,64,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,4,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,4,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,12,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,12,64,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,12,64,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,1,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,1,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,2,64,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,1,64,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,2,64,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,2,64,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,4,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,4,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,4,64,0,1,fp8,fp8,0,0.017573333034912746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,1,64,0,1,float16,float16,0,0.21572266022364298
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,1,64,0,1,float16,fp8,0,0.21026132504145303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,1,64,0,1,fp8,fp8,0,0.2892799973487854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,2,64,0,1,float16,float16,0,0.27426133553187054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,2,64,0,1,float16,fp8,0,0.254805326461792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,2,64,0,1,fp8,fp8,0,0.33553067843119305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,4,64,0,1,float16,float16,0,0.4403200149536133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,4,64,0,1,float16,fp8,0,0.4005546569824219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,12,64,0,1,float16,float16,0,0.2677759925524394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,12,64,0,1,float16,fp8,0,0.19541333119074503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,4,64,0,1,fp8,fp8,0,0.43161598841349286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,12,64,0,1,fp8,fp8,0,0.33689598242441815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,1,64,0,1,float16,float16,0,0.08891733487447102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,1,64,0,1,float16,fp8,0,0.0890880028406779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,1,64,0,1,fp8,fp8,0,0.11315199732780457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,2,64,0,1,float16,float16,0,0.09113599856694539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,2,64,0,1,float16,fp8,0,0.08994133273760478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,2,64,0,1,fp8,fp8,0,0.11485866705576579
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,4,64,0,1,float16,float16,0,0.091648002465566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,4,64,0,1,float16,fp8,0,0.09028266867001851
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,4,64,0,1,fp8,fp8,0,0.12441600362459819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,12,64,0,1,float16,float16,0,0.05495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,12,64,0,1,float16,fp8,0,0.05256533126036326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,12,64,0,1,fp8,fp8,0,0.07116800049940745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,1,64,0,1,float16,float16,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,1,64,0,1,float16,fp8,0,0.04863999783992767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,2,64,0,1,float16,float16,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,1,64,0,1,fp8,fp8,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,2,64,0,1,float16,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,2,64,0,1,fp8,fp8,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,4,64,0,1,float16,float16,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,4,64,0,1,float16,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,4,64,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,12,64,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,12,64,0,1,float16,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,12,64,0,1,fp8,fp8,0,0.039936001102129616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,1,64,0,1,float16,float16,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,1,64,0,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,1,64,0,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,2,64,0,1,float16,float16,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,2,64,0,1,float16,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,2,64,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,4,64,0,1,float16,float16,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,4,64,0,1,float16,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,12,64,0,1,float16,float16,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,4,64,0,1,fp8,fp8,0,0.039936001102129616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,12,64,0,1,float16,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,12,64,0,1,fp8,fp8,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,1,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,1,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,1,64,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,2,64,0,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,2,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,2,64,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,4,64,0,1,float16,float16,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,4,64,0,1,float16,fp8,0,0.021674667795499165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,4,64,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,12,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,12,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,12,64,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,1,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,1,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,2,64,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,1,64,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,2,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,2,64,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,4,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,4,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,4,64,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,12,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,12,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,12,64,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,1,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,1,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,1,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,2,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,2,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,2,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,4,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,4,64,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,4,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,12,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,12,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,12,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,1,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,1,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,1,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,2,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,2,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,2,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,4,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,4,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,4,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,12,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,12,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,12,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,1,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,1,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,1,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,2,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,2,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,2,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,4,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,4,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,4,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,1,64,0,1,float16,float16,0,0.07167999943097432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,1,64,0,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,1,64,0,1,fp8,fp8,0,0.1532586713631948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,2,64,0,1,float16,float16,0,0.07287466526031494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,2,64,0,1,float16,fp8,0,0.07253333429495494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,2,64,0,1,fp8,fp8,0,0.15598932902018228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,4,64,0,1,float16,float16,0,0.0846506655216217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,4,64,0,1,float16,fp8,0,0.07492266595363617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,4,64,0,1,fp8,fp8,0,0.17100799083709717
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,12,64,0,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,12,64,0,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,12,64,0,1,fp8,fp8,0,0.09676800171534221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,1,64,0,1,float16,float16,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,1,64,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,1,64,0,1,fp8,fp8,0,0.08499200145403545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,2,64,0,1,float16,float16,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,2,64,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,2,64,0,1,fp8,fp8,0,0.08567466338475545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,4,64,0,1,float16,float16,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,4,64,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,4,64,0,1,fp8,fp8,0,0.08618666728337605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,12,64,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,12,64,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,12,64,0,1,fp8,fp8,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,1,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,1,64,0,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,1,64,0,1,fp8,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,2,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,2,64,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,2,64,0,1,fp8,fp8,0,0.05000533163547516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,4,64,0,1,float16,fp8,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,4,64,0,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,4,64,0,1,fp8,fp8,0,0.050517335534095764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,12,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,12,64,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,12,64,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,1,64,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,1,64,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,1,64,0,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,2,64,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,2,64,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,2,64,0,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,4,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,4,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,4,64,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,12,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,12,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,12,64,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,1,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,1,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,2,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,1,64,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,2,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,2,64,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,4,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,4,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,4,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,12,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,12,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,12,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,1,64,0,1,float16,float16,0,0.03925333420435587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,1,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,1,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,2,64,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,2,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,2,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,4,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,4,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,4,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,12,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,12,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,12,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,2,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,2,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,4,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,4,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,12,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,12,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,12,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,2,64,0,1,fp8,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,4,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,12,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,12,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,12,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,4,64,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,4,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,1,64,0,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,1,64,0,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,1,64,0,1,fp8,fp8,0,0.1346560021241506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,2,64,0,1,float16,float16,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,2,64,0,1,float16,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,2,64,0,1,fp8,fp8,0,0.1346560021241506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,4,64,0,1,float16,float16,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,4,64,0,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,4,64,0,1,fp8,fp8,0,0.1358506679534912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,12,64,0,1,float16,float16,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,12,64,0,1,float16,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,12,64,0,1,fp8,fp8,0,0.07628799974918365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,1,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,1,64,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,1,64,0,1,fp8,fp8,0,0.07645866771539052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,2,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,2,64,0,1,float16,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,2,64,0,1,fp8,fp8,0,0.0766293356815974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,4,64,0,1,float16,float16,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,4,64,0,1,float16,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,4,64,0,1,fp8,fp8,0,0.07714133461316426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,12,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,12,64,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,12,64,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,1,64,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,1,64,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,1,64,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,2,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,2,64,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,2,64,0,1,fp8,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,4,64,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,4,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,4,64,0,1,fp8,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,12,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,12,64,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,12,64,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,1,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,1,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,1,64,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,2,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,2,64,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,2,64,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,4,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,4,64,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,4,64,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,12,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,12,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,12,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,1,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,1,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,1,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,2,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,2,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,2,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,4,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,4,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,4,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,12,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,12,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,12,64,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,1,64,0,1,float16,fp8,0,0.009893333539366722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,1,64,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,2,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,2,64,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,4,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,12,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,12,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,12,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,1,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,1,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,2,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,4,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,12,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,12,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,12,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,1,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,2,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,2,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,2,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,12,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,12,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,12,64,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,1,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,1,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,1,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,2,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,4,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,1,64,0,1,float16,float16,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,1,64,0,1,float16,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,1,64,0,1,fp8,fp8,0,0.1264639993508657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,2,64,0,1,float16,float16,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,2,64,0,1,float16,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,2,64,0,1,fp8,fp8,0,0.1262933313846588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,4,64,0,1,float16,float16,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,4,64,0,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,4,64,0,1,fp8,fp8,0,0.12680533528327942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,12,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,12,64,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,12,64,0,1,fp8,fp8,0,0.07150933146476746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,1,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,1,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,1,64,0,1,fp8,fp8,0,0.0718506673971812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,2,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,2,64,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,2,64,0,1,fp8,fp8,0,0.07133866846561432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,4,64,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,4,64,0,1,float16,fp8,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,4,64,0,1,fp8,fp8,0,0.07202133536338806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,12,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,12,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,12,64,0,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,1,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,1,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,1,64,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,2,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,2,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,2,64,0,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,4,64,0,1,float16,float16,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,4,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,4,64,0,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,12,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,12,64,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,12,64,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,1,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,1,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,1,64,0,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,2,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,2,64,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,2,64,0,1,fp8,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,4,64,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,4,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,4,64,0,1,fp8,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,12,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,12,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,12,64,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,1,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,1,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,1,64,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,2,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,4,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,4,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,12,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,12,64,0,1,float16,fp8,0,0.009189333145817121
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,12,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,1,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,1,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,2,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,2,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,2,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,4,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,12,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,12,64,0,1,float16,fp8,0,0.008992000172535578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,12,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,1,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,4,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,12,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,12,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,12,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,1,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,1,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,2,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,2,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,4,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,4,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,12,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,12,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,12,64,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,1,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,2,64,0,1,float16,float16,0,0.007680000116427739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,2,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,4,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,4,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,4,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,float16,0,15.631871541341146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,fp8,0,15.520938873291016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,float16,0,15.74673080444336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,1,64,0,1,fp8,fp8,0,19.597994486490887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,fp8,0,15.314090728759766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,float16,0,15.572650909423828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,fp8,0,15.294976552327475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,2,64,0,1,fp8,fp8,0,20.160853068033855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,float16,0,7.522474924723308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,fp8,0,8.390656153361002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,float16,0,8.020138422648111
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,fp8,0,7.670442581176758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,8,64,0,1,fp8,fp8,0,10.197162628173828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,4,64,0,1,fp8,fp8,0,20.22314707438151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,1,64,0,1,fp8,fp8,0,9.917269388834635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,float16,0,7.818752288818359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,fp8,0,7.596031824747722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,2,64,0,1,fp8,fp8,0,10.109952290852865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,float16,0,3.6136960983276367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,fp8,0,3.3989973068237305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,float16,0,7.89572270711263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,fp8,0,7.456768035888672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,8,64,0,1,fp8,fp8,0,5.1594241460164385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,4,64,0,1,fp8,fp8,0,10.040661493937174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,float16,0,3.4082132975260415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,fp8,0,3.6070400873819985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,float16,0,3.452586809794108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,fp8,0,3.4348373413085938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,1,64,0,1,fp8,fp8,0,4.902912139892578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,2,64,0,1,fp8,fp8,0,4.996778806050618
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,float16,0,3.458218574523926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,fp8,0,3.4251092274983725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,4,64,0,1,fp8,fp8,0,4.972544034322103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,fp8,0,1.721343994140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,float16,0,1.7421654065450032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,float16,0,1.831424077351888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,8,64,0,1,fp8,fp8,0,2.5878186225891113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,fp8,0,1.8036053975423176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,1,64,0,1,fp8,fp8,0,2.5470293362935386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,float16,0,1.9290453592936199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,fp8,0,1.8810879389444988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,2,64,0,1,fp8,fp8,0,2.5198933283487954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,float16,0,1.7179306348164876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,fp8,0,1.8302292823791504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,4,64,0,1,fp8,fp8,0,2.5311573346455893
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,float16,0,9.159850438435873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,fp8,0,8.953343709309896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,1,64,0,1,fp8,fp8,0,11.171669006347656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,float16,0,8.665599822998047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,fp8,0,9.057792027791342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,float16,0,8.893098831176758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,fp8,0,8.647167841593424
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,2,64,0,1,fp8,fp8,0,11.652095794677734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,float16,0,4.31547737121582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,fp8,0,4.316842714945476
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,4,64,0,1,fp8,fp8,0,11.730091094970703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,8,64,0,1,fp8,fp8,0,5.872469584147136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,float16,0,4.135082562764485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,fp8,0,3.666090647379557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,1,64,0,1,fp8,fp8,0,5.568853378295898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,float16,0,4.238165219624837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,fp8,0,3.720362663269043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,float16,0,4.150272051493327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,2,64,0,1,fp8,fp8,0,5.646165211995442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,fp8,0,4.097536087036133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,float16,0,1.9631786346435547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,fp8,0,1.90720001856486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,4,64,0,1,fp8,fp8,0,5.755733489990234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,8,64,0,1,fp8,fp8,0,2.93887996673584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,float16,0,1.962154706319173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,fp8,0,1.9655680656433105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,float16,0,1.8877439498901367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,1,64,0,1,fp8,fp8,0,2.7636054356892905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,fp8,0,1.8884266217549641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,float16,0,1.864192008972168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,2,64,0,1,fp8,fp8,0,2.7448320388793945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,fp8,0,1.9003732999165852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,4,64,0,1,fp8,fp8,0,2.7910827000935874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,float16,0,1.0333866278330486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,fp8,0,1.0859519640604656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,8,64,0,1,fp8,fp8,0,1.5144960085550945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,float16,0,1.0693973700205486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,fp8,0,1.0763946374257405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,1,64,0,1,fp8,fp8,0,1.4825812975565593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,float16,0,1.0356053511301677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,fp8,0,1.0820266405741374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,2,64,0,1,fp8,fp8,0,1.504085381825765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,float16,0,1.1101866563161213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,fp8,0,1.1260586579640706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,4,64,0,1,fp8,fp8,0,1.4972586631774902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,float16,0,5.623807907104492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,fp8,0,6.280874888102214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,float16,0,6.325589497884114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,1,64,0,1,fp8,fp8,0,7.970303853352864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,fp8,0,5.956778844197591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,float16,0,6.051328023274739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,2,64,0,1,fp8,fp8,0,7.988053639729817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,fp8,0,5.550591786702474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,float16,0,3.0247252782185874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,fp8,0,2.9356374740600586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,8,64,0,1,fp8,fp8,0,4.1716054280598955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,4,64,0,1,fp8,fp8,0,8.185002644856771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,float16,0,2.711893399556478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,fp8,0,2.634922663370768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,float16,0,2.635093371073405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,1,64,0,1,fp8,fp8,0,3.708928108215332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,fp8,0,2.6356053352355957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,2,64,0,1,fp8,fp8,0,3.8642346064249673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,float16,0,2.8049065272013345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,fp8,0,2.6267306009928384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,float16,0,1.3649919827779133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,4,64,0,1,fp8,fp8,0,3.970047950744629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,fp8,0,1.3370025952657063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,8,64,0,1,fp8,fp8,0,2.0834986368815103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,float16,0,1.363968054453532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,fp8,0,1.36789337793986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,float16,0,1.3375147183736165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,1,64,0,1,fp8,fp8,0,1.9611306190490723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,fp8,0,1.345706621805827
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,float16,0,1.3272746404012044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,2,64,0,1,fp8,fp8,0,1.9512319564819336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,fp8,0,1.3219839731852214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,4,64,0,1,fp8,fp8,0,1.944917360941569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,float16,0,0.7511040369669596
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,fp8,0,0.741546630859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,8,64,0,1,fp8,fp8,0,1.0864640076955159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,float16,0,0.7570772965749105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,fp8,0,0.7654399871826172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,float16,0,0.7837013403574625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,1,64,0,1,fp8,fp8,0,1.0944853623708088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,fp8,0,0.7594666481018066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,2,64,0,1,fp8,fp8,0,1.101482629776001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,float16,0,0.7236266930898031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,fp8,0,0.7439359823862711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,4,64,0,1,fp8,fp8,0,1.0845866998036702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,float16,0,8.516266504923502
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,fp8,0,7.9651838938395185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,float16,0,8.220330556233725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,1,64,0,1,fp8,fp8,0,10.32908821105957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,fp8,0,8.54033088684082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,float16,0,8.580095926920572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,2,64,0,1,fp8,fp8,0,10.898432413736979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,fp8,0,8.185685475667318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,float16,0,4.074837366739909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,fp8,0,4.040704091389974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,4,64,0,1,fp8,fp8,0,10.946219126383463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,float16,0,3.7814613978068032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,fp8,0,3.5370667775472007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,8,64,0,1,fp8,fp8,0,5.538474400838216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,1,64,0,1,fp8,fp8,0,5.011797269185384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,float16,0,3.6959571838378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,fp8,0,3.652949333190918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,float16,0,3.8159360885620117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,2,64,0,1,fp8,fp8,0,5.119829177856445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,fp8,0,3.839146614074707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,fp8,0,1.876138687133789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,4,64,0,1,fp8,fp8,0,5.331797281901042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,float16,0,1.9985067049662273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,8,64,0,1,fp8,fp8,0,2.710357348124186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,float16,0,1.7078612645467122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,fp8,0,1.7037653923034668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,1,64,0,1,fp8,fp8,0,2.43012269337972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,float16,0,1.7198079427083333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,fp8,0,1.734997272491455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,2,64,0,1,fp8,fp8,0,2.471936066945394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,fp8,0,1.675264040629069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,float16,0,1.7361920674641926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,4,64,0,1,fp8,fp8,0,2.5429333051045737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,float16,0,0.8971947034200033
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,fp8,0,0.9086293379465739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,float16,0,0.879957358042399
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,8,64,0,1,fp8,fp8,0,1.36738125483195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,fp8,0,0.8987306753794352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,1,64,0,1,fp8,fp8,0,1.2972373167673747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,fp8,0,0.8944640159606934
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,float16,0,0.9091413021087646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,2,64,0,1,fp8,fp8,0,1.2844373385111492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,float16,0,0.8901973565419515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,fp8,0,0.8876372973124186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,4,64,0,1,fp8,fp8,0,1.2945066293080647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,float16,0,0.5109759966532389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,fp8,0,0.5051733255386353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,8,64,0,1,fp8,fp8,0,0.7459839979807535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,float16,0,0.5140479803085327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,fp8,0,0.5046613216400146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,1,64,0,1,fp8,fp8,0,0.7381333510080973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,float16,0,0.5036373138427734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,fp8,0,0.5077333450317383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,2,64,0,1,fp8,fp8,0,0.7410346666971842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,float16,0,0.504149317741394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,fp8,0,0.49937065442403156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,4,64,0,1,fp8,fp8,0,0.7492266496022543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,float16,0,4.388693491617839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,fp8,0,4.4526933034261065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,1,64,0,1,fp8,fp8,0,5.897045135498047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,float16,0,4.712789217631022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,fp8,0,4.656810760498047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,2,64,0,1,fp8,fp8,0,6.133589426676433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,float16,0,4.590933481852214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,fp8,0,4.750677426656087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,float16,0,2.4214186668395996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,fp8,0,2.3505919774373374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,8,64,0,1,fp8,fp8,0,3.1800320943196616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,4,64,0,1,fp8,fp8,0,6.389589309692383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,float16,0,2.0072107315063477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,fp8,0,1.9322880109151204
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,1,64,0,1,fp8,fp8,0,2.8061014811197915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,fp8,0,1.9831466674804688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,float16,0,2.0505599975585938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,2,64,0,1,fp8,fp8,0,2.8397226333618164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,float16,0,2.167125384012858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,fp8,0,2.0587520599365234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,4,64,0,1,fp8,fp8,0,2.9779628117879233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,float16,0,1.077077309290568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,fp8,0,1.0255359808603923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,8,64,0,1,fp8,fp8,0,1.6080212593078613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,fp8,0,1.0033493041992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,float16,0,1.0347519715627034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,float16,0,0.9958399931589762
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,1,64,0,1,fp8,fp8,0,1.421994686126709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,fp8,0,1.0113706588745117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,2,64,0,1,fp8,fp8,0,1.4187520345052083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,float16,0,0.9842346509297689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,fp8,0,0.9886720180511475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,4,64,0,1,fp8,fp8,0,1.4523733456929524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,fp8,0,0.5423786640167236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,float16,0,0.5343573490778605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,float16,0,0.553984006245931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,fp8,0,0.5232640107472738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,1,64,0,1,fp8,fp8,0,0.772266705830892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,8,64,0,1,fp8,fp8,0,0.7823359966278076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,float16,0,0.5302613178888956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,fp8,0,0.5287253459294637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,2,64,0,1,fp8,fp8,0,0.7673172950744629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,float16,0,0.5346986850102743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,float16,0,0.32204800844192505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,4,64,0,1,fp8,fp8,0,0.7621973355611166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,fp8,0,0.5350399812062582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,fp8,0,0.32392533620198566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,8,64,0,1,fp8,fp8,0,0.4437333345413208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,float16,0,0.3189760049184163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,fp8,0,0.32290132840474445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,1,64,0,1,fp8,fp8,0,0.45158398151397705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,float16,0,0.3251199920972188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,fp8,0,0.32102400064468384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,2,64,0,1,fp8,fp8,0,0.43878400325775146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,float16,0,0.3222186764081319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,fp8,0,0.3264853358268738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,4,64,0,1,fp8,fp8,0,0.44390400250752765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,float16,0,4.326570510864258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,fp8,0,4.319402694702148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,1,64,0,1,fp8,fp8,0,5.5229441324869795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,float16,0,4.5631148020426435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,fp8,0,4.612778663635254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,float16,0,4.870997428894043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,fp8,0,4.6561279296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,2,64,0,1,fp8,fp8,0,6.04142951965332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,float16,0,2.573311964670817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,4,64,0,1,fp8,fp8,0,6.353237152099609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,fp8,0,2.4135680198669434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,8,64,0,1,fp8,fp8,0,3.1225172678629556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,float16,0,1.9573760032653809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,fp8,0,1.9225600560506184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,1,64,0,1,fp8,fp8,0,2.627413272857666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,float16,0,2.033664067586263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,fp8,0,2.0147199630737305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,2,64,0,1,fp8,fp8,0,2.7315200169881186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,float16,0,2.1628586451212564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,fp8,0,2.090837319691976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,4,64,0,1,fp8,fp8,0,2.8881918589274087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,float16,0,1.1712853113810222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,fp8,0,1.1149653593699138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,8,64,0,1,fp8,fp8,0,1.553749402364095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,float16,0,0.9402026335398356
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,fp8,0,0.9183573722839355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,1,64,0,1,fp8,fp8,0,1.283413330713908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,float16,0,0.9014613628387451
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,fp8,0,0.9350826740264893
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,2,64,0,1,fp8,fp8,0,1.2888747056325276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,float16,0,0.9359359741210938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,4,64,0,1,fp8,fp8,0,1.4105599721272786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,float16,0,0.4904959996541341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,fp8,0,0.9279146989186605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,fp8,0,0.4901546637217204
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,8,64,0,1,fp8,fp8,0,0.756223996480306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,float16,0,0.47496533393859863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,fp8,0,0.4715520143508911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,1,64,0,1,fp8,fp8,0,0.6734506289164225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,float16,0,0.48571733633677167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,fp8,0,0.4729173183441162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,float16,0,0.47940266132354736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,fp8,0,0.4792319933573405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,2,64,0,1,fp8,fp8,0,0.6848853429158529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,fp8,0,0.274944007396698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,float16,0,0.2728959918022156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,4,64,0,1,fp8,fp8,0,0.6913706461588541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,float16,0,0.276309331258138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,8,64,0,1,fp8,fp8,0,0.39765334129333496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,fp8,0,0.27562665939331055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,1,64,0,1,fp8,fp8,0,0.38860801855723065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,float16,0,0.276309331258138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,fp8,0,0.2730666597684224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,2,64,0,1,fp8,fp8,0,0.38894931475321454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,float16,0,0.27323732773462933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,fp8,0,0.27101866404215497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,4,64,0,1,fp8,fp8,0,0.3911679983139038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,float16,0,0.18278400103251138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,fp8,0,0.17919999361038208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,float16,0,0.18500266472498575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,8,64,0,1,fp8,fp8,0,0.21640533208847046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,fp8,0,0.18773333231608072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,1,64,0,1,fp8,fp8,0,0.21964800357818604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,float16,0,0.18500266472498575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,fp8,0,0.18227199713389078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,2,64,0,1,fp8,fp8,0,0.21589332818984985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,float16,0,0.17988266547520956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,fp8,0,0.18449066082636514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,4,64,0,1,fp8,fp8,0,0.21691733598709106
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,float16,0,2.4221013387044272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,fp8,0,2.443434715270996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,1,64,0,1,fp8,fp8,0,3.118933359781901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,float16,0,2.6956799825032554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,fp8,0,2.6088107426961265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,2,64,0,1,fp8,fp8,0,3.446613311767578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,float16,0,2.8963839213053384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,fp8,0,2.8216320673624673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,float16,0,1.578154722849528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,4,64,0,1,fp8,fp8,0,3.695616086324056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,fp8,0,1.4701226552327473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,float16,0,1.0897066593170166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,8,64,0,1,fp8,fp8,0,1.9095892906188965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,fp8,0,1.068885326385498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,1,64,0,1,fp8,fp8,0,1.5213227272033691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,fp8,0,1.151146650314331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,float16,0,1.1666773160298665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,2,64,0,1,fp8,fp8,0,1.5846400260925293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,float16,0,1.2835840384165447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,fp8,0,1.221461296081543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,4,64,0,1,fp8,fp8,0,1.7174186706542969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,float16,0,0.6418773333231608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,fp8,0,0.5858986775080363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,8,64,0,1,fp8,fp8,0,0.9548799991607666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,fp8,0,0.5531306664148966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,float16,0,0.5676373243331909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,1,64,0,1,fp8,fp8,0,0.7633919715881348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,float16,0,0.5517653226852417
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,fp8,0,0.5558613141377767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,2,64,0,1,fp8,fp8,0,0.7627092997233073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,float16,0,0.5469866593678793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,fp8,0,0.5635413328806559
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,4,64,0,1,fp8,fp8,0,0.8023040294647217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,float16,0,0.3041279911994934
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,fp8,0,0.30054400364557904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,8,64,0,1,fp8,fp8,0,0.43093331654866535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,float16,0,0.2882560094197591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,fp8,0,0.28228267033894855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,1,64,0,1,fp8,fp8,0,0.4068693319956462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,float16,0,0.2916693290074666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,fp8,0,0.2845013340314229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,float16,0,0.291157325108846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,fp8,0,0.29047467311223346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,2,64,0,1,fp8,fp8,0,0.4130133390426636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,4,64,0,1,fp8,fp8,0,0.42018131415049237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,float16,0,0.1713493267695109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,fp8,0,0.17254400253295898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,8,64,0,1,fp8,fp8,0,0.24576000372568765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,float16,0,0.1764693260192871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,fp8,0,0.17834667364756265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,1,64,0,1,fp8,fp8,0,0.23944532871246338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,fp8,0,0.17595734198888144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,float16,0,0.176639993985494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,2,64,0,1,fp8,fp8,0,0.2367146611213684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,float16,0,0.17612799008687338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,fp8,0,0.18210132916768393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,4,64,0,1,fp8,fp8,0,0.23944532871246338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,float16,0,0.12714667121569315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,fp8,0,0.11417599519093831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,8,64,0,1,fp8,fp8,0,0.15786666671435037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,fp8,0,0.1114453375339508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,float16,0,0.116565336783727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,1,64,0,1,fp8,fp8,0,0.15752533078193665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,float16,0,0.11349333326021831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,fp8,0,0.11741866668065389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,2,64,0,1,fp8,fp8,0,0.16025599837303162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,float16,0,0.1153706709543864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,4,64,0,1,fp8,fp8,0,0.15769599874814352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,fp8,0,2.5279146830240884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,float16,0,2.581162611643473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,1,64,0,1,fp8,fp8,0,3.1870292027791343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,float16,0,2.956117312113444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,fp8,0,2.8658345540364585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,2,64,0,1,fp8,fp8,0,3.5978240966796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,float16,0,3.2783358891805015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,fp8,0,3.0885547002156577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,float16,0,1.7662293116251628
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,4,64,0,1,fp8,fp8,0,3.9236265818277993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,float16,0,1.1398826440175374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,fp8,0,1.6682666142781575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,8,64,0,1,fp8,fp8,0,2.032128016153971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,fp8,0,1.1277653376261394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,1,64,0,1,fp8,fp8,0,1.507157325744629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,float16,0,1.221461296081543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,2,64,0,1,fp8,fp8,0,1.6172374089558919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,fp8,0,1.2436479727427165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,fp8,0,1.387349287668864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,float16,0,1.4376959800720215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,float16,0,0.7720959981282552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,4,64,0,1,fp8,fp8,0,1.767082691192627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,fp8,0,0.7210666338602701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,8,64,0,1,fp8,fp8,0,0.97979736328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,float16,0,0.521727999051412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,fp8,0,0.5307733217875162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,float16,0,0.5396480162938436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,1,64,0,1,fp8,fp8,0,0.7166293462117513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,fp8,0,0.5160959959030151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,2,64,0,1,fp8,fp8,0,0.7306239604949951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,float16,0,0.5659306844075521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,fp8,0,0.5282133420308431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,4,64,0,1,fp8,fp8,0,0.8354132970174154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,float16,0,0.2853546738624573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,fp8,0,0.2769920031229655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,float16,0,0.25975465774536133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,8,64,0,1,fp8,fp8,0,0.4493653376897176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,fp8,0,0.26077866554260254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,1,64,0,1,fp8,fp8,0,0.37034666538238525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,float16,0,0.26129066944122314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,fp8,0,0.25975465774536133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,2,64,0,1,fp8,fp8,0,0.37836798032124835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,float16,0,0.2688000003496806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,fp8,0,0.2696533401807149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,float16,0,0.15377066532770792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,4,64,0,1,fp8,fp8,0,0.38075733184814453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,fp8,0,0.14882133404413858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,8,64,0,1,fp8,fp8,0,0.21998933951059976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,float16,0,0.15121066570281982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,fp8,0,0.15496533115704855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,float16,0,0.1525759994983673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,1,64,0,1,fp8,fp8,0,0.21196800470352173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,fp8,0,0.1551359991232554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,2,64,0,1,fp8,fp8,0,0.21555199225743613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,float16,0,0.14984533190727234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,fp8,0,0.14677332838376364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,4,64,0,1,fp8,fp8,0,0.21538132429122925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,fp8,0,0.09676800171534221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,float16,0,0.10410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,8,64,0,1,fp8,fp8,0,0.12390399972597758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,float16,0,0.09779199957847595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,fp8,0,0.10205866893132527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,1,64,0,1,fp8,fp8,0,0.1225386659304301
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,float16,0,0.09693866968154907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,fp8,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,2,64,0,1,fp8,fp8,0,0.12270933389663696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,float16,0,0.09898666540781657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,fp8,0,0.09966933727264404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,float16,0,0.058880001306533813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,fp8,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,8,64,0,1,fp8,fp8,0,0.0865280032157898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,4,64,0,1,fp8,fp8,0,0.1230506698290507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,float16,0,0.058880001306533813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,fp8,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,1,64,0,1,fp8,fp8,0,0.08686932921409607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,float16,0,0.05819733440876007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,fp8,0,0.05922133227189382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,2,64,0,1,fp8,fp8,0,0.0865280032157898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,float16,0,0.05836800237496694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,fp8,0,0.05853866537412008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,4,64,0,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,float16,0,1.5243946711222331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,fp8,0,1.5105706850687664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,fp8,0,1.7037653923034668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,1,64,0,1,fp8,fp8,0,1.859242598215739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,float16,0,1.7715199788411458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,2,64,0,1,fp8,fp8,0,2.1393067042032876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,float16,0,2.0171093940734863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,fp8,0,1.8991786638895671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,float16,0,1.1112106641133626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,fp8,0,1.0308266480763753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,4,64,0,1,fp8,fp8,0,2.3270400365193686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,float16,0,0.6587733427683512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,8,64,0,1,fp8,fp8,0,1.2663466930389404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,fp8,0,0.6328320105870565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,float16,0,0.7120213508605957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,1,64,0,1,fp8,fp8,0,0.8949759801228842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,fp8,0,0.6898346741994222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,2,64,0,1,fp8,fp8,0,0.9731413523356119
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,float16,0,0.8528213500976562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,fp8,0,0.8188587029774984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,float16,0,0.4241066773732503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,4,64,0,1,fp8,fp8,0,1.076053301493327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,fp8,0,0.3684693177541097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,8,64,0,1,fp8,fp8,0,0.6261759996414185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,float16,0,0.3174399932225545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,fp8,0,0.31539199749628705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,1,64,0,1,fp8,fp8,0,0.42444801330566406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,float16,0,0.32682667175928753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,fp8,0,0.31385600566864014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,2,64,0,1,fp8,fp8,0,0.43673598766326904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,float16,0,0.3217066725095113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,fp8,0,0.32494932413101196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,4,64,0,1,fp8,fp8,0,0.48264535268147785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,fp8,0,0.17407999436060587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,8,64,0,1,fp8,fp8,0,0.24593067169189453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,float16,0,0.17459199825922647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,float16,0,0.1609386702378591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,1,64,0,1,fp8,fp8,0,0.22835199038187662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,fp8,0,0.1621333360671997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,float16,0,0.16435199975967407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,fp8,0,0.16332800189654031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,2,64,0,1,fp8,fp8,0,0.22886399428049722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,float16,0,0.16639999548594156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,fp8,0,0.16059733430544534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,float16,0,0.09864532947540283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,4,64,0,1,fp8,fp8,0,0.2326186696688334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,fp8,0,0.09915733337402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,8,64,0,1,fp8,fp8,0,0.13755733768145242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,float16,0,0.10222933689753215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,fp8,0,0.09762133161226909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,float16,0,0.0988159974416097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,fp8,0,0.10001066327095032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,1,64,0,1,fp8,fp8,0,0.13260799646377563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,2,64,0,1,fp8,fp8,0,0.13380266229311624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,float16,0,0.09830400347709656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,fp8,0,0.10035199920336406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,4,64,0,1,fp8,fp8,0,0.13294933239618936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,fp8,0,0.062122667829195656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,8,64,0,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,fp8,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,1,64,0,1,fp8,fp8,0,0.08567466338475545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,float16,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,float16,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,fp8,0,0.062122667829195656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,2,64,0,1,fp8,fp8,0,0.08533333738644917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,float16,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,fp8,0,0.061610668897628784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,4,64,0,1,fp8,fp8,0,0.0846506655216217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,float16,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,fp8,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,8,64,0,1,fp8,fp8,0,0.06673066814740498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,fp8,0,0.04795733094215393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,float16,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,1,64,0,1,fp8,fp8,0,0.06553600231806438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,2,64,0,1,fp8,fp8,0,0.06690133114655812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,fp8,0,0.04761599997679392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,4,64,0,1,fp8,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,float16,0,1.600170612335205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,fp8,0,1.5856639544169109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,1,64,0,1,fp8,fp8,0,1.7780052820841472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,float16,0,1.7070080439249675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,fp8,0,1.7051307360331218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,2,64,0,1,fp8,fp8,0,1.8490026791890461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,fp8,0,1.8780159950256348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,float16,0,1.9831466674804688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,float16,0,1.2368213335673015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,fp8,0,1.1653119723002117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,4,64,0,1,fp8,fp8,0,2.0645546913146973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,float16,0,0.7579306761423746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,fp8,0,0.7219200134277344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,8,64,0,1,fp8,fp8,0,1.2467199961344402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,1,64,0,1,fp8,fp8,0,0.8999253114064535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,float16,0,0.8405333360036215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,fp8,0,0.8128853638966879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,2,64,0,1,fp8,fp8,0,0.9593173662821451
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,float16,0,0.9391787052154541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,float16,0,0.599722663561503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,fp8,0,0.9118719895680746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,4,64,0,1,fp8,fp8,0,1.0466986497243245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,fp8,0,0.5587626695632935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,8,64,0,1,fp8,fp8,0,0.6437546809514364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,float16,0,0.32631466786066693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,fp8,0,0.3293866713841756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,float16,0,0.3130026658376058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,1,64,0,1,fp8,fp8,0,0.4225706656773885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,fp8,0,0.33740798632303876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,2,64,0,1,fp8,fp8,0,0.4495360056559245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,float16,0,0.3449173370997111
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,fp8,0,0.33638401826222736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,4,64,0,1,fp8,fp8,0,0.5335040092468262
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,float16,0,0.16861865917841592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,fp8,0,0.16315733393033346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,float16,0,0.16025599837303162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,8,64,0,1,fp8,fp8,0,0.28996266921361286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,fp8,0,0.16025599837303162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,1,64,0,1,fp8,fp8,0,0.2177706758181254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,fp8,0,0.1634986698627472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,float16,0,0.16588800152142844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,2,64,0,1,fp8,fp8,0,0.21896533171335855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,fp8,0,0.16571733355522156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,float16,0,0.16110933820406595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,4,64,0,1,fp8,fp8,0,0.22272000710169473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,float16,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,fp8,0,0.09523199995358785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,8,64,0,1,fp8,fp8,0,0.12714667121569315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,fp8,0,0.09506133198738098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,float16,0,0.09642666578292847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,1,64,0,1,fp8,fp8,0,0.12458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,float16,0,0.0936959981918335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,fp8,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,2,64,0,1,fp8,fp8,0,0.12441600362459819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,float16,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,fp8,0,0.09489066402117412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,4,64,0,1,fp8,fp8,0,0.12612266341845194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,float16,0,0.0580266664425532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,fp8,0,0.05751466751098633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,8,64,0,1,fp8,fp8,0,0.06980266670385997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,float16,0,0.05751466751098633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,fp8,0,0.05649066468079885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,1,64,0,1,fp8,fp8,0,0.06929066777229309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,float16,0,0.05580799778302511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,fp8,0,0.05614933371543884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,float16,0,0.05751466751098633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,2,64,0,1,fp8,fp8,0,0.06980266670385997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,fp8,0,0.056832000613212585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,4,64,0,1,fp8,fp8,0,0.07014399766921997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,8,64,0,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,float16,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,1,64,0,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,float16,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,2,64,0,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,4,64,0,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,float16,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,float16,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,8,64,0,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,1,64,0,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,float16,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,2,64,0,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,4,64,0,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,1,64,0,1,float16,float16,0,1.1922773520151775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,1,64,0,1,float16,fp8,0,1.1752106348673503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,1,64,0,1,fp8,fp8,0,1.2194133599599202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,2,64,0,1,float16,float16,0,1.3436586062113445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,2,64,0,1,float16,fp8,0,1.303381363550822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,2,64,0,1,fp8,fp8,0,1.3110613028208415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,4,64,0,1,float16,float16,0,1.6510292689005535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,4,64,0,1,float16,fp8,0,1.550165335337321
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,4,64,0,1,fp8,fp8,0,1.5259307225545247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,8,64,0,1,float16,float16,0,1.1438079675038655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,8,64,0,1,float16,fp8,0,1.0693973700205486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,8,64,0,1,fp8,fp8,0,0.9743359883626302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,1,64,0,1,float16,float16,0,0.5478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,1,64,0,1,fp8,fp8,0,0.6242986520131429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,2,64,0,1,float16,float16,0,0.6531413396199545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,1,64,0,1,float16,fp8,0,0.5282133420308431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,2,64,0,1,float16,fp8,0,0.6302719910939535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,2,64,0,1,fp8,fp8,0,0.6736213366190592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,4,64,0,1,float16,float16,0,0.7910400231679281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,4,64,0,1,float16,fp8,0,0.749397357304891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,4,64,0,1,fp8,fp8,0,0.7770453294118246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,8,64,0,1,float16,float16,0,0.5355519851048788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,8,64,0,1,float16,fp8,0,0.4790613253911336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,8,64,0,1,fp8,fp8,0,0.5050026575724283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,1,64,0,1,float16,float16,0,0.21760000785191855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,1,64,0,1,float16,fp8,0,0.22254933913548788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,1,64,0,1,fp8,fp8,0,0.27101866404215497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,2,64,0,1,float16,float16,0,0.21452800432840982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,2,64,0,1,float16,fp8,0,0.2213546633720398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,2,64,0,1,fp8,fp8,0,0.30719999472300213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,4,64,0,1,float16,float16,0,0.24183466037114462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,4,64,0,1,float16,fp8,0,0.22749867041905722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,4,64,0,1,fp8,fp8,0,0.38929065068562824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,8,64,0,1,float16,float16,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,8,64,0,1,float16,fp8,0,0.11076266566912334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,8,64,0,1,fp8,fp8,0,0.2136746644973755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,1,64,0,1,float16,float16,0,0.10905599594116211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,1,64,0,1,float16,fp8,0,0.10871466994285583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,1,64,0,1,fp8,fp8,0,0.13960533340771994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,2,64,0,1,float16,float16,0,0.10973866780598958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,2,64,0,1,float16,fp8,0,0.11059199770291646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,2,64,0,1,fp8,fp8,0,0.14216533303260803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,4,64,0,1,float16,float16,0,0.11178666353225708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,4,64,0,1,float16,fp8,0,0.11264000336329143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,4,64,0,1,fp8,fp8,0,0.14353066682815552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,8,64,0,1,float16,float16,0,0.06570666531721751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,8,64,0,1,float16,fp8,0,0.06553600231806438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,8,64,0,1,fp8,fp8,0,0.07970133423805237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,1,64,0,1,float16,float16,0,0.06109866499900818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,1,64,0,1,float16,fp8,0,0.06178133189678192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,1,64,0,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,2,64,0,1,float16,float16,0,0.06348800162474315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,2,64,0,1,float16,fp8,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,2,64,0,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,4,64,0,1,float16,float16,0,0.06451199948787689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,4,64,0,1,float16,fp8,0,0.06400000055631001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,4,64,0,1,fp8,fp8,0,0.08123733103275299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,8,64,0,1,float16,fp8,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,8,64,0,1,float16,float16,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,8,64,0,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,1,64,0,1,float16,float16,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,1,64,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,1,64,0,1,fp8,fp8,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,2,64,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,2,64,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,2,64,0,1,fp8,fp8,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,4,64,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,4,64,0,1,float16,fp8,0,0.038912000755469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,4,64,0,1,fp8,fp8,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,8,64,0,1,float16,float16,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,8,64,0,1,float16,fp8,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,1,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,8,64,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,1,64,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,1,64,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,2,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,2,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,2,64,0,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,4,64,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,4,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,8,64,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,8,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,4,64,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,8,64,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,1,64,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,1,64,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,1,64,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,2,64,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,2,64,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,2,64,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,4,64,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,4,64,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,4,64,0,1,fp8,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,8,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,8,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,8,64,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,1,64,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,1,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,1,64,0,1,fp8,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,2,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,2,64,0,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,2,64,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,4,64,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,4,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,4,64,0,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,1,64,0,1,float16,float16,0,0.4689919948577881
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,1,64,0,1,float16,fp8,0,0.4509013493855794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,1,64,0,1,fp8,fp8,0,0.4894719918568929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,2,64,0,1,float16,float16,0,0.5975040197372437
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,2,64,0,1,float16,fp8,0,0.5749760071436564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,2,64,0,1,fp8,fp8,0,0.5364053249359131
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,4,64,0,1,float16,float16,0,0.7758506933848063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,4,64,0,1,float16,fp8,0,0.7405227025349935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,4,64,0,1,fp8,fp8,0,0.6401706536610922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,8,64,0,1,float16,float16,0,0.5329920053482056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,8,64,0,1,float16,fp8,0,0.47189335028330487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,8,64,0,1,fp8,fp8,0,0.4478293259938558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,1,64,0,1,float16,float16,0,0.14830933014551798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,1,64,0,1,float16,fp8,0,0.14762666821479797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,1,64,0,1,fp8,fp8,0,0.19029333194096884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,2,64,0,1,float16,float16,0,0.15291733543078104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,2,64,0,1,float16,fp8,0,0.15479466319084167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,2,64,0,1,fp8,fp8,0,0.2387626568476359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,4,64,0,1,float16,float16,0,0.19899733861287436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,4,64,0,1,float16,fp8,0,0.17851734161376953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,4,64,0,1,fp8,fp8,0,0.32153600454330444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,8,64,0,1,float16,float16,0,0.08686932921409607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,8,64,0,1,float16,fp8,0,0.08243200182914734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,8,64,0,1,fp8,fp8,0,0.17476266622543335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,1,64,0,1,float16,float16,0,0.0773119976123174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,1,64,0,1,float16,fp8,0,0.07867733140786488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,1,64,0,1,fp8,fp8,0,0.10120532910029094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,2,64,0,1,float16,float16,0,0.08191999793052673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,2,64,0,1,float16,fp8,0,0.08055466910203297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,2,64,0,1,fp8,fp8,0,0.10018133123715718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,4,64,0,1,float16,float16,0,0.08379733562469482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,4,64,0,1,float16,fp8,0,0.08669867118199666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,4,64,0,1,fp8,fp8,0,0.10154666503270467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,8,64,0,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,8,64,0,1,float16,fp8,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,8,64,0,1,fp8,fp8,0,0.05836800237496694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,1,64,0,1,float16,float16,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,1,64,0,1,float16,fp8,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,1,64,0,1,fp8,fp8,0,0.05836800237496694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,2,64,0,1,float16,float16,0,0.04437333345413208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,2,64,0,1,float16,fp8,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,2,64,0,1,fp8,fp8,0,0.05853866537412008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,4,64,0,1,float16,float16,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,4,64,0,1,float16,fp8,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,4,64,0,1,fp8,fp8,0,0.05973333120346069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,8,64,0,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,8,64,0,1,float16,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,8,64,0,1,fp8,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,1,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,1,64,0,1,float16,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,1,64,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,2,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,2,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,2,64,0,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,4,64,0,1,float16,float16,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,4,64,0,1,float16,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,4,64,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,8,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,8,64,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,8,64,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,1,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,1,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,1,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,2,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,2,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,2,64,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,4,64,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,4,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,4,64,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,8,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,8,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,8,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,1,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,1,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,1,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,2,64,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,2,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,2,64,0,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,4,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,4,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,4,64,0,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,8,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,8,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,8,64,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,1,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,1,64,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,1,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,2,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,2,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,2,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,4,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,4,64,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,4,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,8,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,8,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,8,64,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,1,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,1,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,1,64,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,2,64,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,2,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,2,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,4,64,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,4,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,4,64,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,1,64,0,1,float16,float16,0,0.1264639993508657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,1,64,0,1,float16,fp8,0,0.1397760013739268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,1,64,0,1,fp8,fp8,0,0.155648003021876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,2,64,0,1,float16,float16,0,0.1389226714769999
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,2,64,0,1,float16,fp8,0,0.13687466581662497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,2,64,0,1,fp8,fp8,0,0.20360533396402994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,4,64,0,1,float16,float16,0,0.18619734048843384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,4,64,0,1,float16,fp8,0,0.1469439963499705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,8,64,0,1,float16,float16,0,0.07167999943097432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,4,64,0,1,fp8,fp8,0,0.2879146734873454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,8,64,0,1,float16,fp8,0,0.06758399804433186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,8,64,0,1,fp8,fp8,0,0.16127999623616537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,1,64,0,1,float16,float16,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,1,64,0,1,float16,fp8,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,1,64,0,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,2,64,0,1,float16,float16,0,0.06297599772612254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,2,64,0,1,float16,fp8,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,2,64,0,1,fp8,fp8,0,0.08260266482830048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,4,64,0,1,float16,fp8,0,0.06570666531721751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,4,64,0,1,float16,float16,0,0.06502399841944377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,4,64,0,1,fp8,fp8,0,0.08157866696516673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,8,64,0,1,float16,float16,0,0.03942399968703588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,8,64,0,1,float16,fp8,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,8,64,0,1,fp8,fp8,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,1,64,0,1,float16,float16,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,1,64,0,1,float16,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,1,64,0,1,fp8,fp8,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,2,64,0,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,2,64,0,1,float16,float16,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,2,64,0,1,fp8,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,4,64,0,1,float16,float16,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,4,64,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,4,64,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,8,64,0,1,float16,float16,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,8,64,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,8,64,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,1,64,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,1,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,1,64,0,1,fp8,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,2,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,2,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,2,64,0,1,fp8,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,4,64,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,4,64,0,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,4,64,0,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,8,64,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,8,64,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,8,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,1,64,0,1,float16,float16,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,1,64,0,1,float16,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,2,64,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,1,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,2,64,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,2,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,4,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,4,64,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,4,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,8,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,8,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,8,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,1,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,1,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,1,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,2,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,2,64,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,2,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,4,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,4,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,4,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,8,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,8,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,8,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,1,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,1,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,1,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,2,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,2,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,2,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,4,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,4,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,4,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,8,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,8,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,8,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,1,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,1,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,1,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,2,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,2,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,2,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,4,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,4,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,8,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,8,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,4,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,8,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,1,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,1,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,1,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,2,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,2,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,2,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,4,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,4,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,4,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,1,64,0,1,float16,float16,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,1,64,0,1,float16,fp8,0,0.05239466826121012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,1,64,0,1,fp8,fp8,0,0.10905599594116211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,2,64,0,1,float16,float16,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,2,64,0,1,float16,fp8,0,0.05358933409055074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,2,64,0,1,fp8,fp8,0,0.10990933577219646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,4,64,0,1,float16,float16,0,0.05495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,4,64,0,1,float16,fp8,0,0.05529599885145823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,8,64,0,1,float16,float16,0,0.034304000437259674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,4,64,0,1,fp8,fp8,0,0.11178666353225708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,8,64,0,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,8,64,0,1,fp8,fp8,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,1,64,0,1,float16,float16,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,1,64,0,1,float16,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,1,64,0,1,fp8,fp8,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,2,64,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,2,64,0,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,2,64,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,4,64,0,1,float16,float16,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,4,64,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,4,64,0,1,fp8,fp8,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,8,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,8,64,0,1,fp8,fp8,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,8,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,1,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,1,64,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,1,64,0,1,fp8,fp8,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,2,64,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,2,64,0,1,float16,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,2,64,0,1,fp8,fp8,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,4,64,0,1,float16,float16,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,4,64,0,1,fp8,fp8,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,4,64,0,1,float16,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,8,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,8,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,8,64,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,1,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,1,64,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,1,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,2,64,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,2,64,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,2,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,4,64,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,4,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,4,64,0,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,8,64,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,8,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,8,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,1,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,1,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,1,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,2,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,2,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,2,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,4,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,4,64,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,4,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,8,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,8,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,8,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,1,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,1,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,2,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,2,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,4,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,4,64,0,1,float16,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,4,64,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,8,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,8,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,8,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,2,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,4,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,8,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,8,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,8,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,2,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,4,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,8,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,8,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,8,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,2,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,4,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,4,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,1,64,0,1,float16,fp8,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,1,64,0,1,float16,float16,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,1,64,0,1,fp8,fp8,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,2,64,0,1,float16,float16,0,0.034304000437259674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,2,64,0,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,2,64,0,1,fp8,fp8,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,4,64,0,1,float16,float16,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,4,64,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,8,64,0,1,float16,float16,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,4,64,0,1,fp8,fp8,0,0.09762133161226909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,8,64,0,1,float16,fp8,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,8,64,0,1,fp8,fp8,0,0.05580799778302511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,1,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,1,64,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,1,64,0,1,fp8,fp8,0,0.05529599885145823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,2,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,2,64,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,4,64,0,1,float16,float16,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,2,64,0,1,fp8,fp8,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,4,64,0,1,float16,fp8,0,0.021674667795499165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,4,64,0,1,fp8,fp8,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,8,64,0,1,float16,float16,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,8,64,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,8,64,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,1,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,1,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,1,64,0,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,2,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,2,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,2,64,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,4,64,0,1,float16,float16,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,4,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,8,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,4,64,0,1,fp8,fp8,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,8,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,8,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,1,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,1,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,1,64,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,2,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,2,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,4,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,2,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,4,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,4,64,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,8,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,8,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,8,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,1,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,2,64,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,4,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,4,64,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,8,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,1,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,1,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,2,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,2,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,4,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,8,64,0,1,float16,float16,0,0.007680000116427739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,8,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,8,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,1,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,1,64,0,1,float16,fp8,0,0.00919999989370505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,4,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,8,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,1,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,1,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,2,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,2,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,8,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,8,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,8,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,2,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,2,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,4,64,0,1,float16,float16,0,0.0086666668454806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,4,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,1,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,1,64,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,1,64,0,1,fp8,fp8,0,0.08959999680519104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,2,64,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,2,64,0,1,float16,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,2,64,0,1,fp8,fp8,0,0.08959999680519104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,4,64,0,1,float16,float16,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,4,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,8,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,4,64,0,1,fp8,fp8,0,0.08942932883898418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,8,64,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,8,64,0,1,fp8,fp8,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,1,64,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,1,64,0,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,1,64,0,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,2,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,2,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,2,64,0,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,4,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,4,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,4,64,0,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,8,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,8,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,1,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,8,64,0,1,fp8,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,1,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,1,64,0,1,fp8,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,2,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,2,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,2,64,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,4,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,4,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,4,64,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,8,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,8,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,1,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,2,64,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,4,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,4,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,4,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,8,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,8,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,8,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,1,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,2,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,2,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,2,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,4,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,8,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,8,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,8,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,1,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,1,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,2,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,4,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,4,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,8,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,8,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,8,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,1,64,0,1,float16,float16,0,0.008837333569924036
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,2,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,2,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,2,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,4,64,0,1,float16,fp8,0,0.009205333267649015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,8,64,0,1,float16,fp8,0,0.009183999771873156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,4,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,8,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,8,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,1,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,1,64,0,1,float16,fp8,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,2,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,4,64,0,1,float16,float16,0,0.008821333448092142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,4,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,4,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,8,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,8,64,0,1,float16,fp8,0,0.008858666444818178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,8,64,0,1,fp8,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,1,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,2,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,2,64,0,1,float16,fp8,0,0.007680000116427739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,2,64,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,4,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,float16,0,7.81056022644043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,fp8,0,7.591594696044922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,float16,0,7.9230295817057295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,float16,0,3.6171092987060547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,fp8,0,3.6026026407877603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,fp8,0,7.7743784586588545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,1,64,0,1,fp8,fp8,0,9.90395736694336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,2,64,0,1,fp8,fp8,0,10.357930501302084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,float16,0,3.401386578877767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,4,64,0,1,fp8,fp8,0,5.099007924397786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,fp8,0,3.5554987589518228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,float16,0,3.532970746358236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,fp8,0,3.4203306833902993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,1,64,0,1,fp8,fp8,0,4.859221458435059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,2,64,0,1,fp8,fp8,0,4.987392107645671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,float16,0,1.7358506520589192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,fp8,0,1.7218559583028157
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,4,64,0,1,fp8,fp8,0,2.5535146395365396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,float16,0,1.7658880551656086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,fp8,0,1.7288533846537273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,1,64,0,1,fp8,fp8,0,2.491221268971761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,float16,0,1.7206613222757976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,fp8,0,1.7145172754923503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,2,64,0,1,fp8,fp8,0,2.5076053937276206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,float16,0,0.9775786399841309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,fp8,0,1.0142719745635986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,float16,0,0.9787733554840088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,4,64,0,1,fp8,fp8,0,1.4475946426391602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,fp8,0,0.9753599961598715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,1,64,0,1,fp8,fp8,0,1.43394136428833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,float16,0,0.9777493476867676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,fp8,0,0.976213296254476
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,2,64,0,1,fp8,fp8,0,1.431722640991211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,float16,0,4.009471893310547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,fp8,0,4.227242787679036
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,float16,0,4.154709180196126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,1,64,0,1,fp8,fp8,0,5.6516265869140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,fp8,0,4.198570569356282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,float16,0,1.9752960205078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,2,64,0,1,fp8,fp8,0,5.837141036987305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,fp8,0,1.9194879531860352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,4,64,0,1,fp8,fp8,0,2.9426345825195312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,float16,0,1.9457707405090332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,fp8,0,1.9510614077250164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,float16,0,1.882794698079427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,1,64,0,1,fp8,fp8,0,2.742272059122721
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,fp8,0,1.9384320576985676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,2,64,0,1,fp8,fp8,0,2.7296425501505532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,float16,0,1.0316800276438396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,fp8,0,1.0132479667663574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,4,64,0,1,fp8,fp8,0,1.4909440676371257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,float16,0,1.0202453136444092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,fp8,0,1.014954646428426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,float16,0,1.0245119730631511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,1,64,0,1,fp8,fp8,0,1.504085381825765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,fp8,0,1.002837340037028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,2,64,0,1,fp8,fp8,0,1.4873600006103516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,float16,0,0.617301344871521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,fp8,0,0.5978453159332275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,4,64,0,1,fp8,fp8,0,0.8869547049204508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,float16,0,0.6045013268788656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,fp8,0,0.6162773370742798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,1,64,0,1,fp8,fp8,0,0.8802986939748129
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,float16,0,0.5990399916966757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,fp8,0,0.6070613463719686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,2,64,0,1,fp8,fp8,0,0.8734719753265381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,float16,0,2.689706802368164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,fp8,0,2.7356160481770835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,float16,0,2.8339198430379233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,1,64,0,1,fp8,fp8,0,3.8548479080200195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,fp8,0,2.7944959004720054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,2,64,0,1,fp8,fp8,0,4.0087893803914385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,float16,0,1.3666987419128418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,fp8,0,1.3361493746439617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,4,64,0,1,fp8,fp8,0,2.0696746508280435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,float16,0,1.3892265955607097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,fp8,0,1.4061226844787598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,float16,0,1.3905919392903645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,1,64,0,1,fp8,fp8,0,1.9276800155639648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,fp8,0,1.3388800621032715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,2,64,0,1,fp8,fp8,0,1.93723726272583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,float16,0,0.7325013478597006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,fp8,0,0.7376213073730469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,4,64,0,1,fp8,fp8,0,1.0765653451283772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,float16,0,0.7453013261159261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,fp8,0,0.7553706963857015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,1,64,0,1,fp8,fp8,0,1.0533546606699626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,float16,0,0.7273813088734945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,fp8,0,0.7417173385620117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,2,64,0,1,fp8,fp8,0,1.0521600246429443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,float16,0,0.45550934473673504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,fp8,0,0.4601173400878906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,4,64,0,1,fp8,fp8,0,0.5870933135350546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,float16,0,0.4628479878107707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,fp8,0,0.4626773198445638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,1,64,0,1,fp8,fp8,0,0.583679993947347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,float16,0,0.4601173400878906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,fp8,0,0.4601173400878906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,2,64,0,1,fp8,fp8,0,0.583679993947347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,float16,0,3.7515945434570312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,fp8,0,3.815765380859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,float16,0,3.9224319458007812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,1,64,0,1,fp8,fp8,0,5.184512138366699
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,fp8,0,3.900416056315104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,float16,0,1.9684693018595378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,2,64,0,1,fp8,fp8,0,5.46884282430013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,fp8,0,1.896447976430257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,4,64,0,1,fp8,fp8,0,2.7013120651245117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,float16,0,1.7921706835428874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,fp8,0,1.7505280176798503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,1,64,0,1,fp8,fp8,0,2.4403626124064126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,float16,0,1.7122987111409504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,fp8,0,1.7216854095458984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,float16,0,0.9060693581899008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,2,64,0,1,fp8,fp8,0,2.5301334063212075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,fp8,0,0.9047040144602457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,4,64,0,1,fp8,fp8,0,1.366528034210205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,float16,0,0.8997546831766764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,fp8,0,0.9216000239054362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,1,64,0,1,fp8,fp8,0,1.2738560040791829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,float16,0,0.8970239957173666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,fp8,0,0.8917333285013834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,2,64,0,1,fp8,fp8,0,1.279317299524943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,float16,0,0.5019306739171346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,fp8,0,0.504149317741394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,4,64,0,1,fp8,fp8,0,0.7284053166707357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,float16,0,0.5145599842071533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,fp8,0,0.5067093372344971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,1,64,0,1,fp8,fp8,0,0.7156053384145101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,float16,0,0.4991999864578247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,fp8,0,0.5087573528289795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,2,64,0,1,fp8,fp8,0,0.7179946899414062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,float16,0,0.32358400026957196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,fp8,0,0.32631466786066693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,4,64,0,1,fp8,fp8,0,0.4193280140558879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,float16,0,0.3319466710090637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,fp8,0,0.33160533507664997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,1,64,0,1,fp8,fp8,0,0.4227413336435954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,float16,0,0.3295573393503825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,fp8,0,0.33177600304285687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,2,64,0,1,fp8,fp8,0,0.41915734608968097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,float16,0,2.0512426694234214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,fp8,0,2.088618596394857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,float16,0,2.2026240030924478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,1,64,0,1,fp8,fp8,0,2.881535847981771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,fp8,0,2.1353813807169595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,2,64,0,1,fp8,fp8,0,3.0962346394856772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,float16,0,1.0801493326822917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,fp8,0,1.0373120307922363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,4,64,0,1,fp8,fp8,0,1.605120023091634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,float16,0,1.0543786684672039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,fp8,0,1.0292906761169434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,1,64,0,1,fp8,fp8,0,1.4054400126139324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,float16,0,1.0091520150502522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,fp8,0,1.02348796526591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,float16,0,0.5529599984486898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,2,64,0,1,fp8,fp8,0,1.4424746831258137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,fp8,0,0.5440853436787924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,4,64,0,1,fp8,fp8,0,0.7799466451009115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,float16,0,0.5331626733144125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,1,64,0,1,fp8,fp8,0,0.7507627010345459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,fp8,0,0.5341866811116537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,float16,0,0.5394773483276367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,fp8,0,0.5350399812062582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,2,64,0,1,fp8,fp8,0,0.764245351155599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,float16,0,0.31641600529352826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,fp8,0,0.31692800919214886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,4,64,0,1,fp8,fp8,0,0.4312746524810791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,float16,0,0.3217066725095113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,fp8,0,0.31641600529352826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,1,64,0,1,fp8,fp8,0,0.43059198061625165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,float16,0,0.32460800806681317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,fp8,0,0.3227306604385376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,2,64,0,1,fp8,fp8,0,0.429909348487854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,float16,0,0.2126506765683492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,fp8,0,0.20974934101104736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,4,64,0,1,fp8,fp8,0,0.2868906656901042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,float16,0,0.2135039965311686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,fp8,0,0.21384533246358237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,1,64,0,1,fp8,fp8,0,0.2862079938252767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,float16,0,0.212991992632548
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,fp8,0,0.21316266059875488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,2,64,0,1,fp8,fp8,0,0.2879146734873454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,float16,0,2.081280072530111
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,fp8,0,2.0346879959106445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,1,64,0,1,fp8,fp8,0,2.7421013514200845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,float16,0,2.3719253540039062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,fp8,0,2.302293300628662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,2,64,0,1,fp8,fp8,0,3.07097593943278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,float16,0,1.1840853691101074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,fp8,0,1.143125295639038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,float16,0,0.9412266413370768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,4,64,0,1,fp8,fp8,0,1.5773013432820637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,fp8,0,0.928938627243042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,1,64,0,1,fp8,fp8,0,1.299455960591634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,float16,0,0.9685333569844564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,float16,0,0.5108053286870321
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,fp8,0,0.9427626927693685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,2,64,0,1,fp8,fp8,0,1.3934933344523113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,fp8,0,0.490666667620341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,4,64,0,1,fp8,fp8,0,0.7418879667917887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,float16,0,0.4800853331883748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,fp8,0,0.4916906754175822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,1,64,0,1,fp8,fp8,0,0.6775466601053873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,float16,0,0.5005653301874796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,fp8,0,0.490666667620341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,2,64,0,1,fp8,fp8,0,0.6758399804433187
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,float16,0,0.27187200387318927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,fp8,0,0.27426133553187054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,4,64,0,1,fp8,fp8,0,0.38980265458424884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,float16,0,0.2752853234608968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,fp8,0,0.2730666597684224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,1,64,0,1,fp8,fp8,0,0.377344012260437
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,float16,0,0.2730666597684224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,fp8,0,0.2677759925524394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,2,64,0,1,fp8,fp8,0,0.37717334429423016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,float16,0,0.18141865730285645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,fp8,0,0.17356799046198526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,float16,0,0.1812480092048645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,4,64,0,1,fp8,fp8,0,0.212991992632548
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,fp8,0,0.18244266510009766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,1,64,0,1,fp8,fp8,0,0.2111146648724874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,float16,0,0.18056533734003702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,fp8,0,0.1802240014076233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,2,64,0,1,fp8,fp8,0,0.20855466524759927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,float16,0,0.10803199807802837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,fp8,0,0.10734933614730835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,4,64,0,1,fp8,fp8,0,0.15615999698638916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,float16,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,fp8,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,1,64,0,1,fp8,fp8,0,0.1544533371925354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,float16,0,0.1114453375339508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,fp8,0,0.11434666315714519
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,2,64,0,1,fp8,fp8,0,0.155648003021876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,float16,0,1.2014933427174885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,fp8,0,1.173845370610555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,1,64,0,1,fp8,fp8,0,1.622869332631429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,float16,0,1.3339306513468425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,fp8,0,1.272320032119751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,2,64,0,1,fp8,fp8,0,1.8225493431091309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,float16,0,0.6415359973907471
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,fp8,0,0.5884586572647095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,4,64,0,1,fp8,fp8,0,0.9634133179982504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,float16,0,0.5799253384272257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,fp8,0,0.5760000149408976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,1,64,0,1,fp8,fp8,0,0.7666347026824951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,float16,0,0.5592746734619141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,fp8,0,0.5572266578674316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,2,64,0,1,fp8,fp8,0,0.7893333435058594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,float16,0,0.2959360082944234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,fp8,0,0.2990079919497172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,4,64,0,1,fp8,fp8,0,0.4215466578801473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,float16,0,0.29815467198689777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,fp8,0,0.2908160090446472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,1,64,0,1,fp8,fp8,0,0.4087466796239217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,float16,0,0.2867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,fp8,0,0.29371732473373413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,2,64,0,1,fp8,fp8,0,0.40806400775909424
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,float16,0,0.16913066307703653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,fp8,0,0.17681066195170084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,4,64,0,1,fp8,fp8,0,0.24132267634073892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,float16,0,0.1800533334414164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,fp8,0,0.1781760056813558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,1,64,0,1,fp8,fp8,0,0.2321066657702128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,float16,0,0.17988266547520956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,fp8,0,0.17203199863433838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,2,64,0,1,fp8,fp8,0,0.23091200987497965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,float16,0,0.10871466994285583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,fp8,0,0.10922666390736897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,4,64,0,1,fp8,fp8,0,0.1551359991232554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,float16,0,0.1109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,fp8,0,0.11332266529401143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,1,64,0,1,fp8,fp8,0,0.1532586713631948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,float16,0,0.1053013304869334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,fp8,0,0.1053013304869334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,2,64,0,1,fp8,fp8,0,0.15411200126012167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,float16,0,0.08157866696516673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,fp8,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,4,64,0,1,fp8,fp8,0,0.116565336783727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,float16,0,0.08243200182914734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,fp8,0,0.0817493349313736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,1,64,0,1,fp8,fp8,0,0.11520000298817952
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,float16,0,0.08106666803359985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,fp8,0,0.0795306662718455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,2,64,0,1,fp8,fp8,0,0.11588266491889954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,float16,0,1.2895572980244954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,fp8,0,1.2272640069325764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,1,64,0,1,fp8,fp8,0,1.6197973887125652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,float16,0,1.5076692899068196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,fp8,0,1.4305280049641926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,float16,0,0.8046933015187582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,2,64,0,1,fp8,fp8,0,1.8669226964314778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,fp8,0,0.7456426620483398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,float16,0,0.5307733217875162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,4,64,0,1,fp8,fp8,0,1.0038613478342693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,fp8,0,0.5333333412806193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,1,64,0,1,fp8,fp8,0,0.7343786557515463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,float16,0,0.5502293507258097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,float16,0,0.2892799973487854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,fp8,0,0.5536426703135172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,2,64,0,1,fp8,fp8,0,0.8511146704355875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,fp8,0,0.27852799495061237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,4,64,0,1,fp8,fp8,0,0.43537068367004395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,float16,0,0.2653866608937581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,fp8,0,0.26606933275858563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,1,64,0,1,fp8,fp8,0,0.3694933255513509
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,float16,0,0.2786986629168193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,fp8,0,0.2744320034980774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,float16,0,0.15172266960144043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,2,64,0,1,fp8,fp8,0,0.3800746599833171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,fp8,0,0.1544533371925354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,4,64,0,1,fp8,fp8,0,0.22016000747680664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,float16,0,0.15974400440851846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,fp8,0,0.1520639955997467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,1,64,0,1,fp8,fp8,0,0.2152106761932373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,float16,0,0.15871999661127725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,fp8,0,0.15428266922632852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,float16,0,0.09779199957847595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,2,64,0,1,fp8,fp8,0,0.21486934026082358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,fp8,0,0.09540266791979472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,4,64,0,1,fp8,fp8,0,0.12185600399971008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,float16,0,0.0981333355108897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,fp8,0,0.10069333513577779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,1,64,0,1,fp8,fp8,0,0.1220266620318095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,float16,0,0.09796266754468282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,fp8,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,2,64,0,1,fp8,fp8,0,0.12014933427174886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,float16,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,fp8,0,0.054101333022117615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,4,64,0,1,fp8,fp8,0,0.08260266482830048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,1,64,0,1,fp8,fp8,0,0.08226133386294048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,fp8,0,0.054101333022117615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,fp8,0,0.05495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,2,64,0,1,fp8,fp8,0,0.08226133386294048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,float16,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,fp8,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,4,64,0,1,fp8,fp8,0,0.07441066702206929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,float16,0,0.04607999821503957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,1,64,0,1,fp8,fp8,0,0.07492266595363617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,float16,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,fp8,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,2,64,0,1,fp8,fp8,0,0.07526400188604991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,float16,0,0.721407969792684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,fp8,0,0.7193600336710612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,1,64,0,1,fp8,fp8,0,0.9550506273905436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,float16,0,0.4092586835225423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,float16,0,0.8654507001241049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,fp8,0,0.8116906483968099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,2,64,0,1,fp8,fp8,0,1.1064319610595703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,fp8,0,0.37666134039560956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,4,64,0,1,fp8,fp8,0,0.626858671506246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,float16,0,0.3331413269042969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,fp8,0,0.3242666721343994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,1,64,0,1,fp8,fp8,0,0.4307626485824585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,fp8,0,0.33023999134699505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,float16,0,0.33587201436360675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,2,64,0,1,fp8,fp8,0,0.46830932299296063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,float16,0,0.17459199825922647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,fp8,0,0.16844799121220908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,4,64,0,1,fp8,fp8,0,0.24302933613459268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,float16,0,0.16537599762280783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,fp8,0,0.1641813317934672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,1,64,0,1,fp8,fp8,0,0.22510933876037598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,float16,0,0.16861865917841592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,fp8,0,0.16827734311421713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,2,64,0,1,fp8,fp8,0,0.22869332631429037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,float16,0,0.10018133123715718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,fp8,0,0.10154666503270467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,float16,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,4,64,0,1,fp8,fp8,0,0.1346560021241506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,fp8,0,0.09915733337402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,1,64,0,1,fp8,fp8,0,0.13141333063443503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,float16,0,0.10240000486373901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,fp8,0,0.09898666540781657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,2,64,0,1,fp8,fp8,0,0.13004799683888754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,float16,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,fp8,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,4,64,0,1,fp8,fp8,0,0.08567466338475545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,float16,0,0.06092800199985504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,1,64,0,1,fp8,fp8,0,0.08362666765848796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,float16,0,0.06092800199985504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,fp8,0,0.06126933296521505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,2,64,0,1,fp8,fp8,0,0.08430932958920796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,float16,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,fp8,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,4,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,float16,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,1,64,0,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,float16,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,fp8,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,2,64,0,1,fp8,fp8,0,0.06297599772612254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,fp8,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,4,64,0,1,fp8,fp8,0,0.05751466751098633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,float16,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,1,64,0,1,fp8,fp8,0,0.05734399954477946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,float16,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,2,64,0,1,fp8,fp8,0,0.0580266664425532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,float16,0,0.8529919783274332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,fp8,0,0.8335359891255697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,1,64,0,1,fp8,fp8,0,0.9441280364990234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,float16,0,0.9494187037150065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,float16,0,0.5918720165888468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,fp8,0,0.9055573145548502
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,fp8,0,0.5529599984486898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,2,64,0,1,fp8,fp8,0,1.0419200261433919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,4,64,0,1,fp8,fp8,0,0.6480213403701782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,float16,0,0.3324586749076843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,fp8,0,0.3341653347015381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,1,64,0,1,fp8,fp8,0,0.4541440010070801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,float16,0,0.3490133285522461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,2,64,0,1,fp8,fp8,0,0.5225813388824463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,fp8,0,0.33058132727940875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,float16,0,0.1704960068066915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,fp8,0,0.16759467124938965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,4,64,0,1,fp8,fp8,0,0.2916693290074666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,float16,0,0.16639999548594156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,fp8,0,0.16059733430544534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,1,64,0,1,fp8,fp8,0,0.21794132391611734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,float16,0,0.1621333360671997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,2,64,0,1,fp8,fp8,0,0.21913599967956543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,fp8,0,0.16401066382726034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,fp8,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,4,64,0,1,fp8,fp8,0,0.1250986655553182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,float16,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,fp8,0,0.09642666578292847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,1,64,0,1,fp8,fp8,0,0.12595199545224509
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,float16,0,0.09676800171534221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,fp8,0,0.09489066402117412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,2,64,0,1,fp8,fp8,0,0.12390399972597758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,float16,0,0.0580266664425532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,fp8,0,0.056832000613212585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,4,64,0,1,fp8,fp8,0,0.06980266670385997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,float16,0,0.05614933371543884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,fp8,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,1,64,0,1,fp8,fp8,0,0.06946133573849995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,float16,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,fp8,0,0.056661332647005715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,2,64,0,1,fp8,fp8,0,0.06894933183987935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,4,64,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,float16,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,float16,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,1,64,0,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,2,64,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,4,64,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,1,64,0,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,2,64,0,1,fp8,fp8,0,0.041984001795450844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,float16,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,4,64,0,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,1,64,0,1,fp8,fp8,0,0.03942399968703588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,float16,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,2,64,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,1,64,0,1,float16,float16,0,0.646997332572937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,1,64,0,1,float16,fp8,0,0.6290773153305054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,1,64,0,1,fp8,fp8,0,0.67413330078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,2,64,0,1,float16,float16,0,0.794111967086792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,2,64,0,1,float16,fp8,0,0.7504213651021322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,2,64,0,1,fp8,fp8,0,0.7639040152231852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,4,64,0,1,float16,float16,0,0.5358933210372925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,4,64,0,1,float16,fp8,0,0.48657067616780597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,1,64,0,1,float16,float16,0,0.21691733598709106
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,4,64,0,1,fp8,fp8,0,0.504319985707601
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,1,64,0,1,float16,fp8,0,0.21555199225743613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,1,64,0,1,fp8,fp8,0,0.3391146659851074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,2,64,0,1,float16,float16,0,0.2404693365097046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,2,64,0,1,float16,fp8,0,0.22408533096313477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,2,64,0,1,fp8,fp8,0,0.3839999834696452
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,4,64,0,1,float16,float16,0,0.1160533328851064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,4,64,0,1,float16,fp8,0,0.1109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,4,64,0,1,fp8,fp8,0,0.21384533246358237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,1,64,0,1,float16,float16,0,0.10973866780598958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,1,64,0,1,float16,fp8,0,0.11178666353225708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,1,64,0,1,fp8,fp8,0,0.14045866330464682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,2,64,0,1,float16,float16,0,0.11212799946467082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,2,64,0,1,float16,fp8,0,0.11229866743087769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,2,64,0,1,fp8,fp8,0,0.1423360009988149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,4,64,0,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,4,64,0,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,4,64,0,1,fp8,fp8,0,0.08106666803359985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,1,64,0,1,float16,float16,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,1,64,0,1,float16,fp8,0,0.06297599772612254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,1,64,0,1,fp8,fp8,0,0.08004266520341237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,2,64,0,1,float16,float16,0,0.06348800162474315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,2,64,0,1,float16,fp8,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,2,64,0,1,fp8,fp8,0,0.08072533210118611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,4,64,0,1,float16,float16,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,4,64,0,1,float16,fp8,0,0.03874133278926214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,4,64,0,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,1,64,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,1,64,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,1,64,0,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,2,64,0,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,2,64,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,2,64,0,1,fp8,fp8,0,0.043007999658584595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,4,64,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,4,64,0,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,4,64,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,1,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,1,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,1,64,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,2,64,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,2,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,2,64,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,4,64,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,4,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,4,64,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,1,64,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,1,64,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,1,64,0,1,fp8,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,2,64,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,2,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,2,64,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,4,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,4,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,4,64,0,1,fp8,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,1,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,1,64,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,1,64,0,1,fp8,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,2,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,2,64,0,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,2,64,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,4,64,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,4,64,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,4,64,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,1,64,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,1,64,0,1,fp8,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,1,64,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,2,64,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,2,64,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,2,64,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,1,64,0,1,float16,float16,0,0.1508693297704061
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,1,64,0,1,float16,fp8,0,0.15121066570281982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,1,64,0,1,fp8,fp8,0,0.24081067244211832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,2,64,0,1,float16,float16,0,0.1960960030555725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,2,64,0,1,float16,fp8,0,0.17100799083709717
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,2,64,0,1,fp8,fp8,0,0.31163734197616577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,4,64,0,1,float16,float16,0,0.09062400460243225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,4,64,0,1,float16,fp8,0,0.08601599931716919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,4,64,0,1,fp8,fp8,0,0.17425066232681274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,1,64,0,1,float16,float16,0,0.08277333279450734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,1,64,0,1,float16,fp8,0,0.08021333316961925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,1,64,0,1,fp8,fp8,0,0.10035199920336406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,2,64,0,1,float16,float16,0,0.08482133348782857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,2,64,0,1,float16,fp8,0,0.0885759989420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,4,64,0,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,2,64,0,1,fp8,fp8,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,4,64,0,1,float16,fp8,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,4,64,0,1,fp8,fp8,0,0.059903999169667564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,1,64,0,1,float16,float16,0,0.04437333345413208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,1,64,0,1,float16,fp8,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,2,64,0,1,float16,float16,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,1,64,0,1,fp8,fp8,0,0.05870933334032694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,2,64,0,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,2,64,0,1,fp8,fp8,0,0.05819733440876007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,4,64,0,1,float16,float16,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,4,64,0,1,float16,fp8,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,4,64,0,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,1,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,1,64,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,1,64,0,1,fp8,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,2,64,0,1,float16,float16,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,2,64,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,2,64,0,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,4,64,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,4,64,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,4,64,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,1,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,1,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,1,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,2,64,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,2,64,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,2,64,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,4,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,4,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,4,64,0,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,1,64,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,1,64,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,1,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,2,64,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,2,64,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,2,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,4,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,4,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,4,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,1,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,1,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,1,64,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,2,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,2,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,2,64,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,4,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,4,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,4,64,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,1,64,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,1,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,1,64,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,2,64,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,2,64,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,2,64,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,4,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,4,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,4,64,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,1,64,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,1,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,1,64,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,2,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,2,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,2,64,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,1,64,0,1,float16,float16,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,1,64,0,1,float16,fp8,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,1,64,0,1,fp8,fp8,0,0.07987200220425923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,2,64,0,1,float16,float16,0,0.06485333542029063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,2,64,0,1,fp8,fp8,0,0.08157866696516673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,2,64,0,1,float16,fp8,0,0.06604800124963124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,4,64,0,1,float16,float16,0,0.03874133278926214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,4,64,0,1,float16,fp8,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,4,64,0,1,fp8,fp8,0,0.04607999821503957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,1,64,0,1,float16,float16,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,1,64,0,1,float16,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,1,64,0,1,fp8,fp8,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,2,64,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,2,64,0,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,2,64,0,1,fp8,fp8,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,4,64,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,4,64,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,4,64,0,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,1,64,0,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,1,64,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,1,64,0,1,fp8,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,2,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,2,64,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,2,64,0,1,fp8,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,4,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,4,64,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,4,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,1,64,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,1,64,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,1,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,2,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,2,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,2,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,4,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,4,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,4,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,1,64,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,1,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,1,64,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,2,64,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,2,64,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,2,64,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,4,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,4,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,4,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,1,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,1,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,1,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,2,64,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,2,64,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,2,64,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,4,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,4,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,1,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,1,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,1,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,2,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,2,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,2,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,4,64,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,4,64,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,1,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,1,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,1,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,2,64,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,2,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,2,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,4,64,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,4,64,0,1,float16,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,4,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,1,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,1,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,1,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,2,64,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,2,64,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,2,64,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,1,64,0,1,float16,float16,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,1,64,0,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,1,64,0,1,fp8,fp8,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,2,64,0,1,float16,float16,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,2,64,0,1,fp8,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,2,64,0,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,4,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,4,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,4,64,0,1,fp8,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,1,64,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,1,64,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,1,64,0,1,fp8,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,2,64,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,2,64,0,1,float16,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,2,64,0,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,4,64,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,4,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,4,64,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,1,64,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,1,64,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,1,64,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,2,64,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,2,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,2,64,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,4,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,4,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,4,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,1,64,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,1,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,1,64,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,2,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,2,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,2,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,4,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,4,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,4,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,1,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,1,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,1,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,2,64,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,2,64,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,4,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,4,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,4,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,4,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,2,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,2,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,4,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,4,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,4,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,4,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,2,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,1,64,0,1,float16,float16,0,0.021674667795499165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,1,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,1,64,0,1,fp8,fp8,0,0.05614933371543884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,2,64,0,1,float16,float16,0,0.021674667795499165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,2,64,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,4,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,2,64,0,1,fp8,fp8,0,0.05614933371543884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,4,64,0,1,float16,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,1,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,4,64,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,1,64,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,1,64,0,1,fp8,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,2,64,0,1,float16,float16,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,2,64,0,1,float16,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,2,64,0,1,fp8,fp8,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,4,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,4,64,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,4,64,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,1,64,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,1,64,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,1,64,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,2,64,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,2,64,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,2,64,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,4,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,4,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,4,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,1,64,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,2,64,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,4,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,1,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,2,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,2,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,2,64,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,4,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,4,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,4,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,1,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,2,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,4,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,4,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,4,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,1,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,2,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,4,64,0,1,float16,float16,0,0.008656000097592672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,4,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,4,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,1,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,1,64,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,1,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,2,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,2,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,4,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,4,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,1,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,1,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,2,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,2,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,2,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,1,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,1,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,1,64,0,1,fp8,fp8,0,0.05239466826121012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,2,64,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,2,64,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,2,64,0,1,fp8,fp8,0,0.05222400029500326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,4,64,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,4,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,4,64,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,1,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,1,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,1,64,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,2,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,2,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,2,64,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,4,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,1,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,1,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,2,64,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,1,64,0,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,2,64,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,4,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,4,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,1,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,1,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,1,64,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,2,64,0,1,float16,float16,0,0.00901333304742972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,2,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,2,64,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,4,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,4,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,4,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,1,64,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,1,64,0,1,float16,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,1,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,2,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,2,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,2,64,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,4,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,4,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,4,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,1,64,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,1,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,2,64,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,2,64,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,4,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,4,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,4,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,1,64,0,1,float16,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,1,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,2,64,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,2,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,4,64,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,4,64,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,4,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,1,64,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,1,64,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,2,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,2,64,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,4,64,0,1,float16,float16,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,2,64,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,4,64,0,1,float16,fp8,0,0.0084906667470932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,4,64,0,1,fp8,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,1,64,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,1,64,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,1,64,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,2,64,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,2,64,0,1,float16,fp8,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,2,64,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,1,128,0,1,fp8,fp8,0,206.7391153971354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,2,128,0,1,fp8,fp8,0,207.64005533854166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,4,128,0,1,fp8,fp8,0,207.17362467447916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,float16,0,167.01456705729166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,fp8,0,166.42884318033853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,96,8,128,0,1,fp8,fp8,0,208.73677571614584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,fp8,0,177.05010986328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,float16,0,177.5849812825521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,1,128,0,1,fp8,fp8,0,103.52145385742188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,2,128,0,1,fp8,fp8,0,102.39539591471355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,2,128,0,1,float16,float16,0,168.46096801757812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,2,128,0,1,float16,fp8,0,167.36102294921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,4,128,0,1,fp8,fp8,0,103.40625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,float16,0,170.07923380533853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,fp8,0,167.95051066080728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,96,128,0,1,fp8,fp8,0,54.044840494791664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,float16,0,89.54282633463542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,8,128,0,1,fp8,fp8,0,99.9208984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,float16,0,87.15707397460938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,fp8,0,93.20567830403645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,float16,0,171.652099609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,fp8,0,171.32645670572916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,1,128,0,1,fp8,fp8,0,48.77039082845052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,fp8,0,86.2759297688802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,2,128,0,1,fp8,fp8,0,49.5283203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,2,128,0,1,float16,float16,0,85.69241333007812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,2,128,0,1,float16,fp8,0,86.35289510091145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,float16,0,88.09506225585938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,4,128,0,1,fp8,fp8,0,48.84479777018229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,fp8,0,88.44031778971355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,96,128,0,1,fp8,fp8,0,26.635264078776043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,float16,0,43.798014322916664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,8,128,0,1,fp8,fp8,0,49.24535624186198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,float16,0,88.03550211588542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,fp8,0,89.62474568684895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,fp8,0,43.78521728515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,float16,0,42.22173817952474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,1,128,0,1,fp8,fp8,0,24.605695088704426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,2,128,0,1,fp8,fp8,0,24.946004231770832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,fp8,0,41.59283192952474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,2,128,0,1,float16,float16,0,41.110015869140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,2,128,0,1,float16,fp8,0,41.923413594563804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,float16,0,41.55716196695963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,4,128,0,1,fp8,fp8,0,24.877398173014324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,fp8,0,41.72390492757162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,8,128,0,1,fp8,fp8,0,24.66082255045573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,fp8,0,41.39537048339844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,float16,0,41.941332499186196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,1,128,0,1,fp8,fp8,0,117.3763427734375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,2,128,0,1,fp8,fp8,0,115.36895751953125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,fp8,0,199.3362833658854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,float16,0,198.77734375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,2,128,0,1,float16,float16,0,198.2269490559896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,2,128,0,1,float16,fp8,0,196.92236328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,float16,0,196.45526123046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,fp8,0,201.9686482747396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,4,128,0,1,fp8,fp8,0,118.82359822591145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,96,128,0,1,fp8,fp8,0,64.47547912597656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,float16,0,104.61610921223958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,fp8,0,103.39635213216145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,8,128,0,1,fp8,fp8,0,117.6451416015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,float16,0,101.43300374348958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,float16,0,195.7017618815104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,1,128,0,1,fp8,fp8,0,55.9469248453776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,fp8,0,100.44671630859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,fp8,0,201.58650716145834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,2,128,0,1,fp8,fp8,0,57.71264139811198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,2,128,0,1,float16,float16,0,99.83419799804688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,2,128,0,1,float16,fp8,0,98.14783732096355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,4,128,0,1,fp8,fp8,0,56.30583699544271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,float16,0,101.58062744140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,fp8,0,98.30076090494792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,8,128,0,1,fp8,fp8,0,59.170135498046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,96,128,0,1,fp8,fp8,0,31.4967041015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,float16,0,100.51259358723958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,fp8,0,50.48320007324219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,float16,0,51.41333516438802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,fp8,0,101.6243184407552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,float16,0,48.54750061035156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,fp8,0,46.993408203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,1,128,0,1,fp8,fp8,0,28.269566853841145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,2,128,0,1,fp8,fp8,0,27.971412658691406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,2,128,0,1,float16,float16,0,47.18267822265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,2,128,0,1,float16,fp8,0,47.85237121582031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,float16,0,47.6948496500651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,4,128,0,1,fp8,fp8,0,29.06385040283203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,fp8,0,48.32341512044271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,8,128,0,1,fp8,fp8,0,28.19976552327474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,float16,0,47.17073059082031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,float16,0,25.206270853678387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,fp8,0,47.358978271484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,96,128,0,1,fp8,fp8,0,15.90869394938151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,fp8,0,25.40014902750651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,float16,0,24.346282958984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,fp8,0,24.217259724934895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,1,128,0,1,fp8,fp8,0,14.268928527832031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,2,128,0,1,fp8,fp8,0,14.366036732991537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,2,128,0,1,float16,float16,0,23.599957784016926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,2,128,0,1,float16,fp8,0,23.742464701334637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,4,128,0,1,fp8,fp8,0,14.748160044352213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,float16,0,24.1626459757487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,fp8,0,23.635454813639324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,8,128,0,1,fp8,fp8,0,14.347434997558594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,float16,0,24.267435709635418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,fp8,0,23.56292215983073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,1,128,0,1,fp8,fp8,0,87.99658203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,2,128,0,1,fp8,fp8,0,85.13518778483073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,float16,0,137.09261067708334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,fp8,0,137.3272705078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,2,128,0,1,float16,float16,0,139.71712239583334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,2,128,0,1,float16,fp8,0,138.45571899414062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,fp8,0,136.62651570638022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,float16,0,141.6292724609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,4,128,0,1,fp8,fp8,0,88.66610717773438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,96,128,0,1,fp8,fp8,0,45.730987548828125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,8,128,0,1,fp8,fp8,0,86.66590372721355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,float16,0,76.78856404622395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,fp8,0,76.97442118326823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,float16,0,71.32911173502605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,float16,0,139.83863321940103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,fp8,0,71.7455342610677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,1,128,0,1,fp8,fp8,0,39.81738789876302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,fp8,0,138.88494873046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,2,128,0,1,fp8,fp8,0,40.15086873372396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,2,128,0,1,float16,float16,0,69.0177714029948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,2,128,0,1,float16,fp8,0,70.4363505045573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,4,128,0,1,fp8,fp8,0,41.72680409749349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,float16,0,71.55438741048177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,fp8,0,69.4108174641927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,8,128,0,1,fp8,fp8,0,40.37000528971354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,float16,0,69.26438395182292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,float16,0,36.58564249674479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,96,128,0,1,fp8,fp8,0,23.171925862630207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,fp8,0,69.6659647623698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,fp8,0,37.18707275390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,float16,0,34.267476399739586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,fp8,0,33.64130147298177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,1,128,0,1,fp8,fp8,0,20.041727701822918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,2,128,0,1,fp8,fp8,0,20.65390904744466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,2,128,0,1,float16,float16,0,33.36669921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,4,128,0,1,fp8,fp8,0,20.233727773030598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,2,128,0,1,float16,fp8,0,33.5998280843099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,float16,0,33.4030507405599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,fp8,0,34.021034240722656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,float16,0,33.77800496419271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,8,128,0,1,fp8,fp8,0,21.026133219401043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,float16,0,18.108927408854168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,fp8,0,33.92204793294271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,96,128,0,1,fp8,fp8,0,11.383295694986979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,fp8,0,19.095381418863933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,float16,0,16.840192159016926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,fp8,0,16.906922658284504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,1,128,0,1,fp8,fp8,0,10.265429178873697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,2,128,0,1,fp8,fp8,0,10.299392064412435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,2,128,0,1,float16,float16,0,17.176746368408203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,2,128,0,1,float16,fp8,0,16.804522196451824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,float16,0,16.741376241048176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,4,128,0,1,fp8,fp8,0,10.119338353474935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,fp8,0,17.108992258707683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,float16,0,17.00488535563151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,8,128,0,1,fp8,fp8,0,10.39633051554362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,fp8,0,16.896170298258465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,1,128,0,1,fp8,fp8,0,110.89220174153645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,2,128,0,1,fp8,fp8,0,113.21497599283855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,4,128,0,1,fp8,fp8,0,111.41529337565105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,96,8,128,0,1,fp8,fp8,0,113.9254659016927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,float16,0,102.14041137695312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,fp8,0,98.59515380859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,1,128,0,1,fp8,fp8,0,52.08507792154948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,float16,0,92.45184326171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,fp8,0,91.68759155273438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,2,128,0,1,fp8,fp8,0,52.44074503580729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,2,128,0,1,float16,fp8,0,94.99204508463542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,2,128,0,1,float16,float16,0,92.18116251627605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,4,128,0,1,fp8,fp8,0,52.6013437906901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,float16,0,90.71615600585938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,fp8,0,92.96828206380208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,float16,0,48.679423014322914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,8,128,0,1,fp8,fp8,0,55.192403157552086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,96,128,0,1,fp8,fp8,0,30.515541076660156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,fp8,0,48.052225748697914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,float16,0,90.3516133626302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,fp8,0,93.28725179036458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,1,128,0,1,fp8,fp8,0,26.977620442708332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,float16,0,43.85843404134115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,fp8,0,43.723775227864586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,2,128,0,1,fp8,fp8,0,26.0309321085612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,2,128,0,1,float16,fp8,0,43.46180216471354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,2,128,0,1,float16,float16,0,43.8283945719401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,4,128,0,1,fp8,fp8,0,26.4451421101888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,float16,0,44.33220418294271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,fp8,0,45.02869160970052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,float16,0,44.08149210611979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,8,128,0,1,fp8,fp8,0,26.399744669596355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,fp8,0,43.994964599609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,96,128,0,1,fp8,fp8,0,15.365802764892578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,fp8,0,23.78990936279297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,float16,0,22.370816548665363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,1,128,0,1,fp8,fp8,0,13.175978342692057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,fp8,0,21.967872619628906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,float16,0,23.775062561035156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,2,128,0,1,float16,float16,0,21.3570556640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,2,128,0,1,fp8,fp8,0,13.28878911336263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,2,128,0,1,float16,fp8,0,21.992106119791668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,float16,0,22.669825236002605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,4,128,0,1,fp8,fp8,0,13.259092966715494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,fp8,0,21.888682047526043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,8,128,0,1,fp8,fp8,0,13.566635131835938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,float16,0,12.138666788736979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,fp8,0,21.72962188720703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,fp8,0,12.412415822347006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,96,128,0,1,fp8,fp8,0,7.598080317179362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,float16,0,11.121663411458334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,fp8,0,10.8134396870931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,float16,0,21.73644765218099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,1,128,0,1,fp8,fp8,0,6.759765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,2,128,0,1,float16,float16,0,10.948949178059896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,2,128,0,1,fp8,fp8,0,6.594048182169597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,2,128,0,1,float16,fp8,0,11.468971252441406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,float16,0,11.266731262207031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,fp8,0,11.23532740275065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,float16,0,11.172693888346354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,4,128,0,1,fp8,fp8,0,6.32473627726237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,8,128,0,1,fp8,fp8,0,6.761130650838216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,fp8,0,11.015338897705078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,1,128,0,1,fp8,fp8,0,64.39577738444011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,2,128,0,1,fp8,fp8,0,65.66434224446614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,4,128,0,1,fp8,fp8,0,66.5157979329427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,96,128,0,1,fp8,fp8,0,39.112874348958336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,float16,0,60.45610555013021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,96,8,128,0,1,fp8,fp8,0,65.00556945800781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,float16,0,50.81548563639323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,fp8,0,56.88866170247396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,1,128,0,1,fp8,fp8,0,30.62903340657552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,fp8,0,50.3560536702474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,2,128,0,1,fp8,fp8,0,30.69781239827474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,2,128,0,1,float16,fp8,0,50.11217244466146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,2,128,0,1,float16,float16,0,52.12808736165365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,float16,0,50.19067891438802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,4,128,0,1,fp8,fp8,0,30.83417510986328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,fp8,0,50.77862548828125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,96,128,0,1,fp8,fp8,0,18.639872233072918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,float16,0,28.808191935221355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,fp8,0,29.593599955240887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,8,128,0,1,fp8,fp8,0,31.305216471354168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,float16,0,51.6319580078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,float16,0,25.34007517496745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,1,128,0,1,fp8,fp8,0,15.178581237792969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,fp8,0,25.135958353678387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,fp8,0,51.30717976888021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,2,128,0,1,float16,float16,0,25.09533945719401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,2,128,0,1,fp8,fp8,0,15.4518191019694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,2,128,0,1,float16,fp8,0,25.838422139485676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,float16,0,25.3668696085612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,fp8,0,25.31481679280599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,4,128,0,1,fp8,fp8,0,15.218517303466797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,8,128,0,1,fp8,fp8,0,15.441919962565104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,fp8,0,25.650858561197918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,float16,0,25.590614318847656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,fp8,0,14.610090891520182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,float16,0,14.332244873046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,96,128,0,1,fp8,fp8,0,9.273685455322266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,float16,0,13.110271453857422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,fp8,0,12.75118891398112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,1,128,0,1,fp8,fp8,0,7.5376637776692705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,2,128,0,1,fp8,fp8,0,8.179712295532227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,2,128,0,1,float16,fp8,0,12.614143371582031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,2,128,0,1,float16,float16,0,12.999679565429688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,float16,0,12.834303538004557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,4,128,0,1,fp8,fp8,0,7.519402821858724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,fp8,0,12.808191935221354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,float16,0,13.039104461669922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,8,128,0,1,fp8,fp8,0,7.853226979573567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,fp8,0,12.992000579833984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,96,128,0,1,fp8,fp8,0,4.633258819580078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,fp8,0,6.853973388671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,float16,0,6.033407847086589
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,1,128,0,1,fp8,fp8,0,3.4228906631469727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,fp8,0,6.325930913289388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,2,128,0,1,fp8,fp8,0,3.6944214502970376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,2,128,0,1,float16,float16,0,6.479018529256185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,2,128,0,1,float16,fp8,0,5.668010711669922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,float16,0,6.901248296101888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,float16,0,6.11140251159668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,fp8,0,5.675178527832031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,4,128,0,1,fp8,fp8,0,3.508053461710612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,float16,0,6.41262944539388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,8,128,0,1,fp8,fp8,0,3.7411839167277017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,fp8,0,6.134613037109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,1,128,0,1,fp8,fp8,0,64.24030049641927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,2,128,0,1,fp8,fp8,0,63.13096618652344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,4,128,0,1,fp8,fp8,0,64.6835225423177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,96,8,128,0,1,fp8,fp8,0,64.41267395019531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,fp8,0,57.01256306966146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,float16,0,56.9700673421224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,float16,0,47.94828796386719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,fp8,0,47.46785990397135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,1,128,0,1,fp8,fp8,0,29.21984100341797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,2,128,0,1,fp8,fp8,0,29.70880126953125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,2,128,0,1,float16,fp8,0,47.43577575683594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,2,128,0,1,float16,float16,0,49.47968037923177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,4,128,0,1,fp8,fp8,0,29.813588460286457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,float16,0,48.14831034342448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,fp8,0,47.870635986328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,8,128,0,1,fp8,fp8,0,30.111572265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,float16,0,28.860244750976562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,float16,0,48.490325927734375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,fp8,0,28.142250061035156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,fp8,0,48.5053456624349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,96,128,0,1,fp8,fp8,0,20.07756805419922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,float16,0,23.656448364257812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,fp8,0,23.764310201009113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,1,128,0,1,fp8,fp8,0,14.95688501993815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,2,128,0,1,float16,float16,0,23.813631693522137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,2,128,0,1,fp8,fp8,0,14.671189626057943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,2,128,0,1,float16,fp8,0,23.95886993408203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,float16,0,23.778475443522137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,4,128,0,1,fp8,fp8,0,14.525098164876303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,fp8,0,23.746388753255207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,8,128,0,1,fp8,fp8,0,14.79202143351237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,float16,0,24.83916727701823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,float16,0,14.008490244547525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,fp8,0,24.03003692626953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,fp8,0,13.731328328450521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,96,128,0,1,fp8,fp8,0,9.81828244527181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,float16,0,12.014762878417969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,1,128,0,1,fp8,fp8,0,7.319722493489583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,fp8,0,12.155562082926432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,2,128,0,1,float16,fp8,0,11.95468775431315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,2,128,0,1,float16,float16,0,12.012714385986328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,2,128,0,1,fp8,fp8,0,6.931114832560222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,4,128,0,1,fp8,fp8,0,6.728021621704102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,float16,0,12.223829905192057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,fp8,0,11.9464963277181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,8,128,0,1,fp8,fp8,0,6.864554723103841
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,float16,0,12.19003677368164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,fp8,0,12.37555185953776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,float16,0,6.950058619181315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,96,128,0,1,fp8,fp8,0,4.846421241760254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,fp8,0,7.05894406636556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,float16,0,5.378389358520508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,fp8,0,5.728426615397136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,1,128,0,1,fp8,fp8,0,3.297792116800944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,2,128,0,1,float16,float16,0,5.846527735392253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,2,128,0,1,fp8,fp8,0,3.2814079920450845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,2,128,0,1,float16,fp8,0,5.832021077473958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,float16,0,5.979477564493815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,fp8,0,5.70521608988444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,4,128,0,1,fp8,fp8,0,3.397120157877604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,float16,0,5.976234436035156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,fp8,0,5.964458465576172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,96,8,128,0,1,fp8,fp8,0,3.4167467753092446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,float16,0,3.4150400161743164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,fp8,0,3.4389333724975586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,96,128,0,1,fp8,fp8,0,2.330453395843506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,float16,0,2.6854400634765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,1,128,0,1,fp8,fp8,0,1.7179306348164876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,2,128,0,1,float16,float16,0,2.702335993448893
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,fp8,0,2.7388585408528647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,2,128,0,1,float16,fp8,0,2.7147947947184243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,2,128,0,1,fp8,fp8,0,1.6766293843587239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,float16,0,2.6827093760172525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,fp8,0,2.7509759267171225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,4,128,0,1,fp8,fp8,0,1.7409706115722656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,float16,0,2.7608747482299805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,fp8,0,2.786304155985514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,96,8,128,0,1,fp8,fp8,0,1.7061546643575032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,1,128,0,1,fp8,fp8,0,37.13672637939453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,2,128,0,1,fp8,fp8,0,38.093994140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,4,128,0,1,fp8,fp8,0,39.54688008626302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,96,8,128,0,1,fp8,fp8,0,37.6463368733724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,float16,0,28.122454325358074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,fp8,0,28.798294067382812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,1,128,0,1,fp8,fp8,0,17.22811762491862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,96,128,0,1,fp8,fp8,0,26.44940694173177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,fp8,0,35.0201161702474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,float16,0,36.17382303873698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,2,128,0,1,float16,float16,0,28.19737497965495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,2,128,0,1,fp8,fp8,0,17.965396881103516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,4,128,0,1,fp8,fp8,0,17.48479970296224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,2,128,0,1,float16,fp8,0,27.875157674153645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,fp8,0,28.08592987060547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,float16,0,28.842666625976562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,float16,0,28.274518330891926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,8,128,0,1,fp8,fp8,0,18.190848032633465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,fp8,0,28.613632202148438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,float16,0,17.49009068806966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,96,128,0,1,fp8,fp8,0,13.042858123779297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,1,128,0,1,fp8,fp8,0,8.77124277750651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,fp8,0,17.497941335042317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,fp8,0,14.100308736165365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,float16,0,13.560661315917969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,2,128,0,1,fp8,fp8,0,8.739157358805338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,2,128,0,1,float16,float16,0,14.307839711507162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,4,128,0,1,fp8,fp8,0,8.628906885782877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,2,128,0,1,float16,fp8,0,14.147584279378256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,float16,0,14.131540934244791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,fp8,0,14.196907043457031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,float16,0,14.410410563151041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,fp8,0,13.907455444335938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,96,8,128,0,1,fp8,fp8,0,8.752127965291342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,float16,0,8.913920084635416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,fp8,0,8.702634811401367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,float16,0,6.461439768473308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,96,128,0,1,fp8,fp8,0,6.454613367716472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,1,128,0,1,fp8,fp8,0,4.083370526631673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,fp8,0,6.95466677347819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,2,128,0,1,float16,float16,0,6.305791854858398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,2,128,0,1,float16,fp8,0,7.159978866577148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,2,128,0,1,fp8,fp8,0,4.108288129170735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,4,128,0,1,fp8,fp8,0,4.176383972167969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,fp8,0,7.0097916920979815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,float16,0,6.901418685913086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,float16,0,7.026517232259114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,fp8,0,7.062527974446614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,96,8,128,0,1,fp8,fp8,0,4.150272051493327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,float16,0,4.345173199971517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,fp8,0,4.313770612080892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,96,128,0,1,fp8,fp8,0,3.1392428080240884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,float16,0,3.2314027150472007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,1,128,0,1,fp8,fp8,0,2.0468053817749023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,fp8,0,3.2414719263712564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,2,128,0,1,float16,float16,0,3.2341334025065103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,2,128,0,1,float16,fp8,0,3.1904427210489907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,2,128,0,1,fp8,fp8,0,1.9899733861287434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,float16,0,3.3664000829060874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,4,128,0,1,fp8,fp8,0,2.0365653038024902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,fp8,0,3.2795305252075195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,float16,0,3.3460906346639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,fp8,0,3.259221394856771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,96,8,128,0,1,fp8,fp8,0,2.0626773834228516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,float16,0,2.1304319699605307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,fp8,0,2.0959572792053223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,96,128,0,1,fp8,fp8,0,1.4935040473937988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,float16,0,1.5583573977152507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,1,128,0,1,fp8,fp8,0,1.0253653526306152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,fp8,0,1.5853226979573567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,2,128,0,1,float16,float16,0,1.5586986541748047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,2,128,0,1,float16,fp8,0,1.5566506385803223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,2,128,0,1,fp8,fp8,0,0.9675093491872152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,float16,0,1.559893290201823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,fp8,0,1.5957333246866863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,4,128,0,1,fp8,fp8,0,1.02348796526591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,float16,0,1.6424959500630696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,fp8,0,1.6279892921447754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,96,8,128,0,1,fp8,fp8,0,1.0304853121439617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,1,128,0,1,fp8,fp8,0,39.71362050374349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,2,128,0,1,fp8,fp8,0,40.07953135172526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,4,128,0,1,fp8,fp8,0,40.03754679361979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,96,8,128,0,1,fp8,fp8,0,40.59801483154297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,float16,0,38.513834635416664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,fp8,0,38.06276194254557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,float16,0,27.108011881510418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,fp8,0,27.428863525390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,1,128,0,1,fp8,fp8,0,17.407488505045574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,2,128,0,1,float16,float16,0,27.670870463053387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,2,128,0,1,fp8,fp8,0,18.259114583333332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,4,128,0,1,fp8,fp8,0,17.75479507446289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,2,128,0,1,float16,fp8,0,27.47443135579427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,float16,0,27.638272603352863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,fp8,0,26.969940185546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,float16,0,27.812693277994793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,8,128,0,1,fp8,fp8,0,18.686463673909504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,float16,0,13.54376475016276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,fp8,0,18.94109853108724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,96,128,0,1,fp8,fp8,0,14.723072052001953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,float16,0,19.197781880696613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,fp8,0,13.691903432210287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,fp8,0,28.384256998697918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,1,128,0,1,fp8,fp8,0,8.648874918619791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,2,128,0,1,fp8,fp8,0,8.56439463297526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,2,128,0,1,float16,float16,0,13.206016540527344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,2,128,0,1,float16,fp8,0,13.740543365478516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,4,128,0,1,fp8,fp8,0,8.806400299072266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,float16,0,13.697364807128906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,fp8,0,13.7258669535319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,float16,0,14.303231557210287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,8,128,0,1,fp8,fp8,0,8.788480122884115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,fp8,0,13.75266138712565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,float16,0,6.6935469309488935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,float16,0,9.4651730855306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,fp8,0,9.558015823364258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,96,128,0,1,fp8,fp8,0,7.1178239186604815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,1,128,0,1,fp8,fp8,0,4.023296038309733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,fp8,0,6.9169495900472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,2,128,0,1,float16,float16,0,6.774954477945964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,2,128,0,1,fp8,fp8,0,4.124671936035156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,2,128,0,1,float16,fp8,0,6.706176122029622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,float16,0,6.923946380615234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,fp8,0,6.666581471761067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,4,128,0,1,fp8,fp8,0,4.104021390279134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,float16,0,6.713685353597005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,fp8,0,6.478335698445638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,96,8,128,0,1,fp8,fp8,0,4.25489075978597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,float16,0,4.710058530171712
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,fp8,0,4.684970537821452
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,96,128,0,1,fp8,fp8,0,3.4788694381713867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,float16,0,3.147775967915853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,fp8,0,3.207680066426595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,1,128,0,1,fp8,fp8,0,2.0164267222086587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,2,128,0,1,float16,float16,0,3.1875413258870444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,2,128,0,1,float16,fp8,0,3.2105814615885415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,2,128,0,1,fp8,fp8,0,2.0725760459899902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,float16,0,3.23908265431722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,fp8,0,3.2068265279134116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,4,128,0,1,fp8,fp8,0,2.084010601043701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,float16,0,3.2863572438557944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,fp8,0,3.2481279373168945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,96,8,128,0,1,fp8,fp8,0,2.1478400230407715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,float16,0,2.3140692710876465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,fp8,0,2.2396586736043296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,float16,0,1.570474624633789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,96,128,0,1,fp8,fp8,0,1.6542720794677734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,fp8,0,1.5380479494730632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,1,128,0,1,fp8,fp8,0,0.9791146914164225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,2,128,0,1,float16,float16,0,1.5711572964986165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,2,128,0,1,float16,fp8,0,1.586176077524821
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,2,128,0,1,fp8,fp8,0,0.9910613695780436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,float16,0,1.602560043334961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,fp8,0,1.5617705980936687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,4,128,0,1,fp8,fp8,0,1.010858694712321
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,float16,0,1.6252586046854656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,fp8,0,1.6296960512797039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,96,8,128,0,1,fp8,fp8,0,1.0641067028045654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,float16,0,1.1422719955444336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,96,128,0,1,fp8,fp8,0,0.7799466451009115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,fp8,0,1.1072853406270344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,float16,0,0.7599786917368571
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,fp8,0,0.7507627010345459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,1,128,0,1,fp8,fp8,0,0.5046613216400146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,2,128,0,1,float16,float16,0,0.7773866653442383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,2,128,0,1,fp8,fp8,0,0.5021013418833414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,2,128,0,1,float16,fp8,0,0.7632213433583578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,fp8,0,0.7604906558990479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,float16,0,0.7705600261688232
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,4,128,0,1,fp8,fp8,0,0.5046613216400146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,float16,0,0.7712426980336508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,fp8,0,0.7886506716410319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,96,8,128,0,1,fp8,fp8,0,0.4991999864578247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,1,128,0,1,fp8,fp8,0,24.05956268310547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,2,128,0,1,fp8,fp8,0,23.848960876464844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,4,128,0,1,fp8,fp8,0,24.792577107747395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,96,8,128,0,1,fp8,fp8,0,24.627710978190105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,float16,0,16.345940907796223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,96,128,0,1,fp8,fp8,0,20.102315266927082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,float16,0,25.259862263997395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,fp8,0,24.99754587809245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,fp8,0,16.353280385335285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,1,128,0,1,fp8,fp8,0,10.708479563395182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,2,128,0,1,fp8,fp8,0,10.689024607340494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,2,128,0,1,float16,float16,0,16.416768391927082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,2,128,0,1,float16,fp8,0,16.354644775390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,4,128,0,1,fp8,fp8,0,10.727252960205078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,fp8,0,16.74598439534505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,float16,0,16.930474599202473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,float16,0,16.87773895263672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,8,128,0,1,fp8,fp8,0,11.0470822652181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,fp8,0,16.617642720540363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,float16,0,12.603392283121744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,fp8,0,12.486826578776041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,float16,0,8.45141347249349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,fp8,0,8.143360137939453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,96,128,0,1,fp8,fp8,0,9.994922637939453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,1,128,0,1,fp8,fp8,0,5.147989273071289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,2,128,0,1,float16,fp8,0,8.228352228800455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,2,128,0,1,float16,float16,0,7.947263717651367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,2,128,0,1,fp8,fp8,0,5.169322649637858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,float16,0,8.353279749552408
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,4,128,0,1,fp8,fp8,0,5.3896535237630205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,fp8,0,8.163157145182291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,float16,0,8.077994664510092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,fp8,0,8.03874142964681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,96,8,128,0,1,fp8,fp8,0,5.4302717844645185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,float16,0,6.207658767700195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,96,128,0,1,fp8,fp8,0,4.868778546651204
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,float16,0,3.976874669392904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,fp8,0,3.9690240224202475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,fp8,0,6.204416275024414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,1,128,0,1,fp8,fp8,0,2.6175146102905273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,2,128,0,1,float16,float16,0,3.988138516743978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,2,128,0,1,fp8,fp8,0,2.6516480445861816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,2,128,0,1,float16,fp8,0,3.904512087504069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,float16,0,4.0043519337972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,fp8,0,3.9691947301228843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,4,128,0,1,fp8,fp8,0,2.6705919901529946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,float16,0,4.062037467956543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,fp8,0,4.057600021362305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,96,8,128,0,1,fp8,fp8,0,2.7668479283650718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,96,128,0,1,fp8,fp8,0,2.3089493115743003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,float16,0,3.0185813903808594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,fp8,0,2.960042635599772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,float16,0,1.974613348642985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,fp8,0,1.949013392130534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,1,128,0,1,fp8,fp8,0,1.234773317972819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,2,128,0,1,float16,fp8,0,1.9491839408874512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,float16,0,1.97597869237264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,2,128,0,1,float16,float16,0,1.9858773549397786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,2,128,0,1,fp8,fp8,0,1.2651519775390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,fp8,0,2.0014079411824546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,4,128,0,1,fp8,fp8,0,1.305087963740031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,float16,0,2.0213759740193686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,fp8,0,2.0316160519917807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,96,8,128,0,1,fp8,fp8,0,1.330858627955119
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,float16,0,1.5148372650146484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,96,128,0,1,fp8,fp8,0,1.1199146906534831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,fp8,0,1.4493014017740886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,float16,0,0.8890026410420736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,fp8,0,0.8734719753265381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,1,128,0,1,fp8,fp8,0,0.5947733322779337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,2,128,0,1,float16,float16,0,0.8927573362986246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,2,128,0,1,float16,fp8,0,0.9164799849192301
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,2,128,0,1,fp8,fp8,0,0.6079146862030029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,float16,0,0.9009493192036947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,fp8,0,0.9069226582845052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,4,128,0,1,fp8,fp8,0,0.6070613463719686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,fp8,0,0.9309866428375244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,float16,0,0.9344000021616617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,96,8,128,0,1,fp8,fp8,0,0.649727980295817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,float16,0,0.739840030670166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,fp8,0,0.7007573445638021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,96,128,0,1,fp8,fp8,0,0.53111465771993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,fp8,0,0.4654080073038737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,1,128,0,1,fp8,fp8,0,0.2943999965985616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,float16,0,0.45585068066914874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,2,128,0,1,float16,float16,0,0.4805973370869954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,2,128,0,1,float16,fp8,0,0.4724053144454956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,2,128,0,1,fp8,fp8,0,0.29286400477091473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,float16,0,0.474453330039978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,fp8,0,0.46506667137145996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,4,128,0,1,fp8,fp8,0,0.29525333642959595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,float16,0,0.46404266357421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,fp8,0,0.4747946659723918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,96,8,128,0,1,fp8,fp8,0,0.2996906638145447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,1,128,0,1,fp8,fp8,0,24.390485127766926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,2,128,0,1,fp8,fp8,0,24.67515818277995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,1,128,0,1,float16,fp8,0,34.87214914957682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,4,128,0,1,fp8,fp8,0,26.130091349283855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,96,8,128,0,1,fp8,fp8,0,26.484395345052082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,float16,0,28.785664876302082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,fp8,0,27.381078084309895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,float16,0,16.967508951822918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,fp8,0,16.692053476969402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,2,128,0,1,fp8,fp8,0,11.600042978922525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,2,128,0,1,float16,float16,0,16.50005340576172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,1,128,0,1,fp8,fp8,0,11.685887654622396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,float16,0,17.10728581746419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,2,128,0,1,float16,fp8,0,16.851455688476562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,fp8,0,17.19483693440755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,4,128,0,1,fp8,fp8,0,12.243456522623697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,8,128,0,1,fp8,fp8,0,12.438699086507162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,float16,0,14.181888580322266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,fp8,0,13.593087514241537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,fp8,0,17.06939697265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,float16,0,17.212586720784504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,96,128,0,1,fp8,fp8,0,11.781120300292969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,float16,0,8.244053522745768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,fp8,0,8.229375839233398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,1,128,0,1,fp8,fp8,0,5.512874603271484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,2,128,0,1,float16,fp8,0,8.097962697347006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,2,128,0,1,fp8,fp8,0,5.535061518351237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,2,128,0,1,float16,float16,0,8.23193613688151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,float16,0,8.293376286824545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,fp8,0,8.484863917032877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,4,128,0,1,fp8,fp8,0,5.779626846313477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,float16,0,8.31658681233724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,8,128,0,1,fp8,fp8,0,6.013781229654948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,fp8,0,8.301226933797201
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,float16,0,7.11355717976888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,fp8,0,6.7203413645426435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,1,128,0,1,fp8,fp8,0,2.7921066284179688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,96,128,0,1,fp8,fp8,0,5.83901850382487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,fp8,0,4.12501335144043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,2,128,0,1,float16,fp8,0,4.134229342142741
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,2,128,0,1,fp8,fp8,0,2.8368212381998696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,2,128,0,1,float16,float16,0,4.134911855061849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,float16,0,4.217173258463542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,float16,0,4.087978680928548
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,4,128,0,1,fp8,fp8,0,2.9132798512776694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,fp8,0,4.112725257873535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,8,128,0,1,fp8,fp8,0,2.978303909301758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,fp8,0,4.155392011006673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,float16,0,4.21888001759847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,float16,0,3.504810651143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,fp8,0,3.323391914367676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,float16,0,2.0396374066670737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,fp8,0,2.0008959770202637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,1,128,0,1,fp8,fp8,0,1.3566293716430664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,2,128,0,1,float16,float16,0,2.0077226956685386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,96,128,0,1,fp8,fp8,0,2.8175360361735025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,2,128,0,1,fp8,fp8,0,1.3600427309672039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,2,128,0,1,float16,fp8,0,2.02513058980306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,float16,0,2.0575572649637857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,4,128,0,1,fp8,fp8,0,1.3919572830200195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,fp8,0,2.0787199338277182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,fp8,0,2.0754772822062173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,float16,0,2.1135360399881997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,96,8,128,0,1,fp8,fp8,0,1.4660266240437825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,float16,0,1.6936960220336914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,fp8,0,1.6139945983886719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,96,128,0,1,fp8,fp8,0,1.330858627955119
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,float16,0,0.9640959898630778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,fp8,0,0.9767253398895264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,1,128,0,1,fp8,fp8,0,0.6478506724039713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,2,128,0,1,float16,float16,0,0.9842346509297689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,2,128,0,1,float16,fp8,0,1.0019839604695637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,2,128,0,1,fp8,fp8,0,0.6702079772949219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,fp8,0,0.9972053368886312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,float16,0,1.0019839604695637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,4,128,0,1,fp8,fp8,0,0.6871039867401123
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,float16,0,1.0431146621704102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,fp8,0,1.021440029144287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,96,8,128,0,1,fp8,fp8,0,0.717141310373942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,fp8,0,0.7840426762898763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,float16,0,0.8270506858825684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,96,128,0,1,fp8,fp8,0,0.6360746622085571
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,float16,0,0.427349328994751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,fp8,0,0.4210346539815267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,1,128,0,1,fp8,fp8,0,0.28808534145355225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,2,128,0,1,float16,float16,0,0.4379306634267171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,2,128,0,1,float16,fp8,0,0.4275199969609578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,2,128,0,1,fp8,fp8,0,0.2797226707140605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,fp8,0,0.42956801255544025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,float16,0,0.44390400250752765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,4,128,0,1,fp8,fp8,0,0.2986666758855184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,float16,0,0.44697598616282147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,fp8,0,0.44817066192626953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,96,8,128,0,1,fp8,fp8,0,0.3099306623140971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,float16,0,0.34201598167419434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,fp8,0,0.28245333830515545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,96,128,0,1,fp8,fp8,0,0.28893866141637164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,1,128,0,1,fp8,fp8,0,0.13482667009035745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,2,128,0,1,float16,float16,0,0.21623466412226358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,fp8,0,0.21862399578094482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,float16,0,0.2208426594734192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,2,128,0,1,float16,fp8,0,0.2135039965311686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,2,128,0,1,fp8,fp8,0,0.1365333298842112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,float16,0,0.21572266022364298
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,fp8,0,0.21742933988571167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,4,128,0,1,fp8,fp8,0,0.13516799608866373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,float16,0,0.2228906750679016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,fp8,0,0.22220800320307413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,96,8,128,0,1,fp8,fp8,0,0.13550933202107748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,1,128,0,1,fp8,fp8,0,19.21109390258789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,2,128,0,1,fp8,fp8,0,19.56334940592448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,4,128,0,1,fp8,fp8,0,20.714154561360676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,96,8,128,0,1,fp8,fp8,0,21.13467788696289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,96,128,0,1,float16,float16,0,26.472618103027344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,1,128,0,1,float16,float16,0,12.052992502848307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,96,128,0,1,float16,fp8,0,25.0415776570638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,2,128,0,1,fp8,fp8,0,9.184085210164389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,2,128,0,1,float16,fp8,0,12.201642354329428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,1,128,0,1,float16,fp8,0,12.018688201904297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,1,128,0,1,fp8,fp8,0,8.900778452555338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,2,128,0,1,float16,float16,0,11.977216084798178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,4,128,0,1,fp8,fp8,0,9.557674407958984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,4,128,0,1,float16,fp8,0,13.211477915445963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,4,128,0,1,float16,float16,0,12.637866973876953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,8,128,0,1,float16,fp8,0,12.619946797688803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,8,128,0,1,float16,float16,0,12.508331298828125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,96,8,128,0,1,fp8,fp8,0,9.723904291788736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,96,128,0,1,float16,float16,0,13.232468922932943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,1,128,0,1,float16,float16,0,5.7895253499348955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,96,128,0,1,float16,fp8,0,12.539562225341797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,96,128,0,1,fp8,fp8,0,10.864128112792969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,1,128,0,1,float16,fp8,0,5.8646189371744795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,1,128,0,1,fp8,fp8,0,4.2405548095703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,2,128,0,1,float16,float16,0,5.826901117960612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,2,128,0,1,fp8,fp8,0,4.278954823811849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,2,128,0,1,float16,fp8,0,5.7581227620442705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,4,128,0,1,fp8,fp8,0,4.486485481262207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,4,128,0,1,float16,fp8,0,6.086314519246419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,4,128,0,1,float16,float16,0,6.305450439453125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,8,128,0,1,float16,float16,0,6.106794357299805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,8,128,0,1,float16,fp8,0,6.024192174275716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,96,8,128,0,1,fp8,fp8,0,4.643157323201497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,1,128,0,1,float16,float16,0,2.9458773930867515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,96,128,0,1,float16,float16,0,6.617600123087565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,96,128,0,1,fp8,fp8,0,5.2896426518758135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,96,128,0,1,float16,fp8,0,6.247594833374023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,1,128,0,1,float16,fp8,0,2.9532159169514975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,1,128,0,1,fp8,fp8,0,2.077184041341146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,2,128,0,1,float16,float16,0,3.0018558502197266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,2,128,0,1,float16,fp8,0,2.9322239557902017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,2,128,0,1,fp8,fp8,0,2.1087573369344077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,4,128,0,1,float16,fp8,0,3.0692691802978516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,4,128,0,1,float16,float16,0,3.0617599487304688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,4,128,0,1,fp8,fp8,0,2.1225813229878745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,8,128,0,1,float16,float16,0,3.0441811879475913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,8,128,0,1,fp8,fp8,0,2.28983465830485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,96,8,128,0,1,float16,fp8,0,3.0310398737589517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,96,128,0,1,float16,float16,0,3.3013760248819985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,96,128,0,1,float16,fp8,0,3.107669194539388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,96,128,0,1,fp8,fp8,0,2.590720017751058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,1,128,0,1,float16,float16,0,1.4390613238016765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,1,128,0,1,fp8,fp8,0,1.0246826807657878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,1,128,0,1,float16,fp8,0,1.4279680252075195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,2,128,0,1,float16,float16,0,1.4441812833150227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,2,128,0,1,float16,fp8,0,1.4448639551798503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,2,128,0,1,fp8,fp8,0,1.029802640279134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,4,128,0,1,float16,float16,0,1.4909440676371257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,4,128,0,1,fp8,fp8,0,1.063594659169515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,4,128,0,1,float16,fp8,0,1.468074639638265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,8,128,0,1,float16,fp8,0,1.5133013725280762
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,8,128,0,1,float16,float16,0,1.511082649230957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,96,8,128,0,1,fp8,fp8,0,1.109503984451294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,96,128,0,1,float16,fp8,0,1.4945279757181804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,96,128,0,1,float16,float16,0,1.5774720509847004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,96,128,0,1,fp8,fp8,0,1.2100266615549724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,1,128,0,1,float16,fp8,0,0.6722559928894043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,1,128,0,1,float16,float16,0,0.6737919648488363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,1,128,0,1,fp8,fp8,0,0.4814506769180298
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,2,128,0,1,float16,float16,0,0.6828373273213705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,2,128,0,1,float16,fp8,0,0.6871039867401123
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,2,128,0,1,fp8,fp8,0,0.4978346824645996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,4,128,0,1,float16,float16,0,0.7111679712931315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,4,128,0,1,float16,fp8,0,0.6988800366719564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,4,128,0,1,fp8,fp8,0,0.5150719881057739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,8,128,0,1,float16,float16,0,0.7304533322652181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,8,128,0,1,fp8,fp8,0,0.5490346749623617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,96,8,128,0,1,float16,fp8,0,0.7248213291168213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,96,128,0,1,float16,float16,0,0.7393279870351156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,96,128,0,1,float16,fp8,0,0.6971733570098877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,96,128,0,1,fp8,fp8,0,0.5645653406778971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,1,128,0,1,float16,float16,0,0.27801599105199176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,1,128,0,1,fp8,fp8,0,0.18517333269119263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,2,128,0,1,float16,float16,0,0.28330665826797485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,1,128,0,1,float16,fp8,0,0.27374933163324994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,2,128,0,1,float16,fp8,0,0.28245333830515545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,2,128,0,1,fp8,fp8,0,0.19268266359965006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,4,128,0,1,float16,float16,0,0.2879146734873454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,4,128,0,1,fp8,fp8,0,0.20684800545374551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,8,128,0,1,float16,float16,0,0.2995199958483378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,4,128,0,1,float16,fp8,0,0.2834773262341817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,8,128,0,1,float16,fp8,0,0.2913279930750529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,96,8,128,0,1,fp8,fp8,0,0.2379093368848165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,96,128,0,1,float16,float16,0,0.2635093331336975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,96,128,0,1,float16,fp8,0,0.21316266059875488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,96,128,0,1,fp8,fp8,0,0.24132267634073892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,1,128,0,1,float16,float16,0,0.13192533453305563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,2,128,0,1,float16,float16,0,0.13636266191800436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,1,128,0,1,fp8,fp8,0,0.09045333663622539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,1,128,0,1,float16,fp8,0,0.13329066832860312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,2,128,0,1,float16,fp8,0,0.13636266191800436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,2,128,0,1,fp8,fp8,0,0.09147733449935913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,4,128,0,1,float16,float16,0,0.13619200388590494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,4,128,0,1,float16,fp8,0,0.13329066832860312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,4,128,0,1,fp8,fp8,0,0.091648002465566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,8,128,0,1,float16,float16,0,0.13550933202107748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,8,128,0,1,float16,fp8,0,0.1389226714769999
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,96,8,128,0,1,fp8,fp8,0,0.09215999643007915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,96,128,0,1,float16,float16,0,0.09028266867001851
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,96,128,0,1,float16,fp8,0,0.0885759989420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,96,128,0,1,fp8,fp8,0,0.056832000613212585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,1,128,0,1,float16,float16,0,0.07850666840871175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,1,128,0,1,float16,fp8,0,0.07850666840871175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,1,128,0,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,2,128,0,1,float16,float16,0,0.07987200220425923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,2,128,0,1,float16,fp8,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,2,128,0,1,fp8,fp8,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,4,128,0,1,float16,float16,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,4,128,0,1,float16,fp8,0,0.07970133423805237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,4,128,0,1,fp8,fp8,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,8,128,0,1,float16,float16,0,0.07970133423805237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,8,128,0,1,float16,fp8,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,96,8,128,0,1,fp8,fp8,0,0.05222400029500326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,1,128,0,1,fp8,fp8,0,7.822506586710612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,1,128,0,1,float16,float16,0,10.747562408447266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,1,128,0,1,float16,fp8,0,10.713429768880209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,2,128,0,1,fp8,fp8,0,7.88650639851888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,2,128,0,1,float16,float16,0,10.781013488769531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,2,128,0,1,float16,fp8,0,10.763605753580729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,4,128,0,1,float16,float16,0,11.785216013590494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,4,128,0,1,fp8,fp8,0,8.472576141357422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,4,128,0,1,float16,fp8,0,11.484672546386719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,8,128,0,1,float16,float16,0,11.69100824991862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,8,128,0,1,float16,fp8,0,11.506516774495443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,96,8,128,0,1,fp8,fp8,0,8.893098831176758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,96,128,0,1,float16,float16,0,13.075115203857422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,96,128,0,1,float16,fp8,0,12.364288330078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,1,128,0,1,float16,float16,0,5.164373397827148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,1,128,0,1,float16,fp8,0,5.15993595123291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,1,128,0,1,fp8,fp8,0,3.660287857055664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,96,128,0,1,fp8,fp8,0,10.737664540608725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,2,128,0,1,float16,float16,0,5.209941228230794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,2,128,0,1,fp8,fp8,0,3.7411839167277017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,2,128,0,1,float16,fp8,0,5.180757204691569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,4,128,0,1,float16,float16,0,5.496831893920898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,4,128,0,1,fp8,fp8,0,3.94325319925944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,4,128,0,1,float16,fp8,0,5.47430419921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,8,128,0,1,float16,float16,0,5.597866694132487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,8,128,0,1,float16,fp8,0,5.541034698486328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,96,8,128,0,1,fp8,fp8,0,4.208469390869141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,96,128,0,1,float16,float16,0,6.528170903523763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,96,128,0,1,float16,fp8,0,6.200661341349284
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,1,128,0,1,float16,float16,0,2.7361278533935547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,1,128,0,1,float16,fp8,0,2.7101866404215493
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,96,128,0,1,fp8,fp8,0,5.24458662668864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,1,128,0,1,fp8,fp8,0,1.8408106168111165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,2,128,0,1,float16,float16,0,2.747904141743978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,2,128,0,1,float16,fp8,0,2.741077423095703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,2,128,0,1,fp8,fp8,0,1.8887680371602376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,4,128,0,1,float16,float16,0,2.6794665654500327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,4,128,0,1,float16,fp8,0,2.6760533650716147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,4,128,0,1,fp8,fp8,0,1.9810986518859863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,8,128,0,1,float16,fp8,0,2.7769174575805664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,8,128,0,1,float16,float16,0,2.791935920715332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,96,8,128,0,1,fp8,fp8,0,2.035711924235026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,96,128,0,1,float16,fp8,0,3.112277348836263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,1,128,0,1,float16,float16,0,1.242965300877889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,96,128,0,1,float16,float16,0,3.2967678705851235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,96,128,0,1,fp8,fp8,0,2.5335466066996255
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,1,128,0,1,float16,fp8,0,1.238869349161784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,1,128,0,1,fp8,fp8,0,0.8391679922739664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,2,128,0,1,float16,float16,0,1.2625919977823894
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,2,128,0,1,float16,fp8,0,1.2542293071746826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,2,128,0,1,fp8,fp8,0,0.8966826597849528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,4,128,0,1,float16,float16,0,1.318399985631307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,4,128,0,1,float16,fp8,0,1.3066240151723225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,4,128,0,1,fp8,fp8,0,0.91921067237854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,8,128,0,1,float16,float16,0,1.381717363993327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,8,128,0,1,float16,fp8,0,1.3764266967773438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,96,8,128,0,1,fp8,fp8,0,0.979967991511027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,96,128,0,1,float16,float16,0,1.5883946418762207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,96,128,0,1,fp8,fp8,0,1.207808017730713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,96,128,0,1,float16,fp8,0,1.5015254020690918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,1,128,0,1,float16,float16,0,0.5579093297322592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,1,128,0,1,float16,fp8,0,0.5616639852523804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,1,128,0,1,fp8,fp8,0,0.40908801555633545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,2,128,0,1,fp8,fp8,0,0.4203519821166992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,4,128,0,1,float16,float16,0,0.5961386760075887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,2,128,0,1,float16,fp8,0,0.5671253204345703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,4,128,0,1,float16,fp8,0,0.5857280095418295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,2,128,0,1,float16,float16,0,0.5672959884007772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,4,128,0,1,fp8,fp8,0,0.43878400325775146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,8,128,0,1,float16,float16,0,0.6382933457692465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,8,128,0,1,float16,fp8,0,0.6282240152359009
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,96,8,128,0,1,fp8,fp8,0,0.47189335028330487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,96,128,0,1,float16,fp8,0,0.6842026710510254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,96,128,0,1,float16,float16,0,0.7369386355082194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,96,128,0,1,fp8,fp8,0,0.544426679611206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,1,128,0,1,float16,float16,0,0.20172800620396933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,1,128,0,1,float16,fp8,0,0.20497065782546997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,1,128,0,1,fp8,fp8,0,0.14882133404413858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,2,128,0,1,float16,fp8,0,0.20616533358891806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,2,128,0,1,float16,float16,0,0.20753065745035806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,4,128,0,1,float16,float16,0,0.21384533246358237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,2,128,0,1,fp8,fp8,0,0.15411200126012167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,4,128,0,1,float16,fp8,0,0.20804266134897867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,4,128,0,1,fp8,fp8,0,0.1513813336690267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,8,128,0,1,float16,float16,0,0.2300586700439453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,8,128,0,1,float16,fp8,0,0.21964800357818604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,96,128,0,1,float16,float16,0,0.23825067281723022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,96,8,128,0,1,fp8,fp8,0,0.19029333194096884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,96,128,0,1,float16,fp8,0,0.18056533734003702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,96,128,0,1,fp8,fp8,0,0.20872533321380615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,1,128,0,1,float16,float16,0,0.09301333626111348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,1,128,0,1,float16,fp8,0,0.09403733412424724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,1,128,0,1,fp8,fp8,0,0.06638933221499126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,2,128,0,1,float16,float16,0,0.09608532985051473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,2,128,0,1,float16,fp8,0,0.0962559978167216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,2,128,0,1,fp8,fp8,0,0.0679253339767456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,4,128,0,1,float16,float16,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,4,128,0,1,float16,fp8,0,0.09557333588600159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,8,128,0,1,float16,float16,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,4,128,0,1,fp8,fp8,0,0.06775466601053874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,8,128,0,1,float16,fp8,0,0.0942080020904541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,96,8,128,0,1,fp8,fp8,0,0.06741333504517873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,96,128,0,1,float16,float16,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,96,128,0,1,float16,fp8,0,0.0602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,96,128,0,1,fp8,fp8,0,0.04351999859015147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,1,128,0,1,float16,float16,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,1,128,0,1,float16,fp8,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,1,128,0,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,2,128,0,1,float16,fp8,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,2,128,0,1,float16,float16,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,2,128,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,4,128,0,1,float16,float16,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,4,128,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,8,128,0,1,float16,float16,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,4,128,0,1,float16,fp8,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,8,128,0,1,float16,fp8,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,96,8,128,0,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,96,128,0,1,float16,float16,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,96,128,0,1,float16,fp8,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,96,128,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,1,128,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,1,128,0,1,fp8,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,1,128,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,2,128,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,2,128,0,1,fp8,fp8,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,2,128,0,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,4,128,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,4,128,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,4,128,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,8,128,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,8,128,0,1,float16,float16,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,96,8,128,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,1,128,0,1,float16,float16,0,5.156693458557129
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,1,128,0,1,float16,fp8,0,5.106346766153972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,1,128,0,1,fp8,fp8,0,3.5727361043294272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,2,128,0,1,fp8,fp8,0,3.632298787434896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,2,128,0,1,float16,float16,0,5.193386713663737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,2,128,0,1,float16,fp8,0,5.173760096232097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,4,128,0,1,float16,float16,0,5.391018549601237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,4,128,0,1,float16,fp8,0,5.431978861490886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,4,128,0,1,fp8,fp8,0,3.8022826512654624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,8,128,0,1,fp8,fp8,0,4.074837366739909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,8,128,0,1,float16,float16,0,5.673642476399739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,96,8,128,0,1,float16,fp8,0,5.553663889567058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,96,128,0,1,float16,float16,0,6.585685094197591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,96,128,0,1,float16,fp8,0,6.264832178751628
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,1,128,0,1,float16,float16,0,2.5453227361043296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,1,128,0,1,float16,fp8,0,2.5478827158610025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,96,128,0,1,fp8,fp8,0,5.334869384765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,1,128,0,1,fp8,fp8,0,1.7140053113301594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,2,128,0,1,float16,float16,0,2.579626719156901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,2,128,0,1,float16,fp8,0,2.5603413581848145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,2,128,0,1,fp8,fp8,0,1.7481387456258137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,4,128,0,1,float16,float16,0,2.6752001444498696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,4,128,0,1,float16,fp8,0,2.661717255910238
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,4,128,0,1,fp8,fp8,0,1.806506633758545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,8,128,0,1,float16,float16,0,2.7996158599853516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,8,128,0,1,float16,fp8,0,2.7567787170410156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,96,8,128,0,1,fp8,fp8,0,1.9730772972106934
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,96,128,0,1,float16,float16,0,3.2797012329101562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,96,128,0,1,float16,fp8,0,3.1112534205118814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,1,128,0,1,float16,float16,0,1.3433173497517903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,96,128,0,1,fp8,fp8,0,2.5832106272379556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,1,128,0,1,float16,fp8,0,1.3334186871846516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,1,128,0,1,fp8,fp8,0,0.8645973205566406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,2,128,0,1,float16,float16,0,1.354922612508138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,2,128,0,1,float16,fp8,0,1.3474133809407551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,2,128,0,1,fp8,fp8,0,0.8920746644337972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,4,128,0,1,float16,float16,0,1.3096960385640461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,4,128,0,1,float16,fp8,0,1.3951999346415203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,4,128,0,1,fp8,fp8,0,0.8734719753265381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,8,128,0,1,float16,float16,0,1.375402609507243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,8,128,0,1,float16,fp8,0,1.3605546951293945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,96,128,0,1,float16,float16,0,1.589077313741048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,96,128,0,1,float16,fp8,0,1.5018666585286458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,1,128,0,1,float16,float16,0,0.5657600164413452
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,1,128,0,1,float16,fp8,0,0.5642240047454834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,96,128,0,1,fp8,fp8,0,1.1965440114339192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,2,128,0,1,float16,float16,0,0.5741226673126221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,1,128,0,1,fp8,fp8,0,0.37324798107147217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,2,128,0,1,float16,fp8,0,0.5756586790084839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,96,8,128,0,1,fp8,fp8,0,0.9407146771748861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,2,128,0,1,fp8,fp8,0,0.3872426748275757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,4,128,0,1,float16,fp8,0,0.59989333152771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,4,128,0,1,float16,float16,0,0.6067200104395548
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,4,128,0,1,fp8,fp8,0,0.4041386842727661
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,8,128,0,1,float16,float16,0,0.6478506724039713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,8,128,0,1,float16,fp8,0,0.6369280020395914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,96,8,128,0,1,fp8,fp8,0,0.42905600865681964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,96,128,0,1,float16,fp8,0,0.6900053024291992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,96,128,0,1,fp8,fp8,0,0.5454506476720175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,1,128,0,1,float16,float16,0,0.16435199975967407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,1,128,0,1,fp8,fp8,0,0.10854400197664897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,1,128,0,1,float16,fp8,0,0.1634986698627472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,2,128,0,1,float16,float16,0,0.17015467087427774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,96,128,0,1,float16,float16,0,0.740010658899943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,2,128,0,1,float16,fp8,0,0.16947199900945029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,2,128,0,1,fp8,fp8,0,0.1109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,4,128,0,1,float16,float16,0,0.18227199713389078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,4,128,0,1,float16,fp8,0,0.1764693260192871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,4,128,0,1,fp8,fp8,0,0.14062933127085367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,8,128,0,1,float16,float16,0,0.20360533396402994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,8,128,0,1,float16,fp8,0,0.19490132729212442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,96,8,128,0,1,fp8,fp8,0,0.17254400253295898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,96,128,0,1,float16,float16,0,0.2397866646448771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,96,128,0,1,float16,fp8,0,0.1786880095799764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,96,128,0,1,fp8,fp8,0,0.1986560026804606
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,1,128,0,1,float16,float16,0,0.07082666456699371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,1,128,0,1,float16,fp8,0,0.07167999943097432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,1,128,0,1,fp8,fp8,0,0.054101333022117615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,2,128,0,1,float16,float16,0,0.0730453332265218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,2,128,0,1,fp8,fp8,0,0.053930665055910744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,2,128,0,1,float16,fp8,0,0.07253333429495494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,4,128,0,1,float16,float16,0,0.07441066702206929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,4,128,0,1,float16,fp8,0,0.07458133498827617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,8,128,0,1,float16,float16,0,0.07321600119272868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,4,128,0,1,fp8,fp8,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,8,128,0,1,fp8,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,96,8,128,0,1,float16,fp8,0,0.07287466526031494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,96,128,0,1,float16,float16,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,96,128,0,1,float16,fp8,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,96,128,0,1,fp8,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,1,128,0,1,float16,float16,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,1,128,0,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,1,128,0,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,2,128,0,1,float16,float16,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,2,128,0,1,float16,fp8,0,0.04215466479460398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,2,128,0,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,4,128,0,1,float16,fp8,0,0.04266666869322459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,4,128,0,1,float16,float16,0,0.041984001795450844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,4,128,0,1,fp8,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,8,128,0,1,float16,float16,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,8,128,0,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,96,8,128,0,1,fp8,fp8,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,96,128,0,1,float16,float16,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,96,128,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,96,128,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,1,128,0,1,float16,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,1,128,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,2,128,0,1,float16,float16,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,2,128,0,1,float16,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,1,128,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,2,128,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,4,128,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,4,128,0,1,float16,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,4,128,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,8,128,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,8,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,96,8,128,0,1,fp8,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,96,128,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,96,128,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,96,128,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,1,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,1,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,1,128,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,2,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,2,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,2,128,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,4,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,4,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,4,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,8,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,8,128,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,96,8,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,1,128,0,1,float16,float16,0,2.534229278564453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,1,128,0,1,float16,fp8,0,2.546687920888265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,1,128,0,1,fp8,fp8,0,1.7215147018432617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,2,128,0,1,float16,float16,0,2.571605364481608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,2,128,0,1,float16,fp8,0,2.5625599225362143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,2,128,0,1,fp8,fp8,0,1.7633280754089355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,4,128,0,1,float16,float16,0,2.680490811665853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,4,128,0,1,float16,fp8,0,2.662229379018148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,4,128,0,1,fp8,fp8,0,1.8512214024861653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,8,128,0,1,float16,float16,0,2.797909418741862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,8,128,0,1,fp8,fp8,0,2.0125013987223306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,96,8,128,0,1,float16,fp8,0,2.7557547887166343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,1,128,0,1,float16,float16,0,1.2417706648508708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,96,128,0,1,float16,float16,0,3.322709401448568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,96,128,0,1,float16,fp8,0,3.143679936726888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,96,128,0,1,fp8,fp8,0,2.5709226926167807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,1,128,0,1,float16,fp8,0,1.248255968093872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,1,128,0,1,fp8,fp8,0,0.8185173670450846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,2,128,0,1,float16,float16,0,1.2637866338094075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,2,128,0,1,float16,fp8,0,1.3230079809824626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,2,128,0,1,fp8,fp8,0,0.8354132970174154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,4,128,0,1,float16,float16,0,1.3073066870371501
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,4,128,0,1,fp8,fp8,0,0.8796160221099854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,4,128,0,1,float16,fp8,0,1.2997972965240479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,8,128,0,1,float16,float16,0,1.3830827077229817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,8,128,0,1,float16,fp8,0,1.3730133374532063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,96,8,128,0,1,fp8,fp8,0,0.9576106866200765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,96,128,0,1,fp8,fp8,0,1.2083199818929036
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,1,128,0,1,float16,float16,0,0.603989322980245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,96,128,0,1,float16,fp8,0,1.5196159680684407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,1,128,0,1,float16,fp8,0,0.6016000111897787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,96,128,0,1,float16,float16,0,1.6138240496317546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,1,128,0,1,fp8,fp8,0,0.39133866628011066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,2,128,0,1,float16,float16,0,0.6132053136825562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,2,128,0,1,float16,fp8,0,0.6075733502705892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,2,128,0,1,fp8,fp8,0,0.40396801630655926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,4,128,0,1,float16,float16,0,0.5973333517710367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,4,128,0,1,float16,fp8,0,0.632149338722229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,4,128,0,1,fp8,fp8,0,0.38860801855723065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,8,128,0,1,float16,float16,0,0.6437546809514364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,8,128,0,1,float16,fp8,0,0.6466559966405233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,96,8,128,0,1,fp8,fp8,0,0.43093331654866535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,96,128,0,1,float16,float16,0,0.7424000104268392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,96,128,0,1,float16,fp8,0,0.694271961847941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,1,128,0,1,float16,float16,0,0.16247466206550598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,96,128,0,1,fp8,fp8,0,0.5538133382797241
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,1,128,0,1,float16,fp8,0,0.16332800189654031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,1,128,0,1,fp8,fp8,0,0.0981333355108897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,2,128,0,1,float16,float16,0,0.16657066345214844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,2,128,0,1,float16,fp8,0,0.1646933356920878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,2,128,0,1,fp8,fp8,0,0.09864532947540283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,4,128,0,1,float16,fp8,0,0.17629865805308023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,4,128,0,1,float16,float16,0,0.1812480092048645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,4,128,0,1,fp8,fp8,0,0.13550933202107748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,8,128,0,1,float16,float16,0,0.20497065782546997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,8,128,0,1,fp8,fp8,0,0.15633066495259604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,96,8,128,0,1,float16,fp8,0,0.19592533508936563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,96,128,0,1,float16,float16,0,0.22988800207773843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,96,128,0,1,float16,fp8,0,0.17339734236399332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,96,128,0,1,fp8,fp8,0,0.19490132729212442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,1,128,0,1,float16,float16,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,1,128,0,1,float16,fp8,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,1,128,0,1,fp8,fp8,0,0.048810665806134544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,2,128,0,1,float16,float16,0,0.061610668897628784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,2,128,0,1,float16,fp8,0,0.061610668897628784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,2,128,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,4,128,0,1,float16,float16,0,0.062122667829195656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,4,128,0,1,float16,fp8,0,0.062122667829195656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,4,128,0,1,fp8,fp8,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,8,128,0,1,float16,float16,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,8,128,0,1,float16,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,96,8,128,0,1,fp8,fp8,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,96,128,0,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,96,128,0,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,96,128,0,1,float16,float16,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,1,128,0,1,float16,float16,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,1,128,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,1,128,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,2,128,0,1,float16,float16,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,2,128,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,4,128,0,1,float16,float16,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,2,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,4,128,0,1,float16,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,4,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,8,128,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,8,128,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,96,8,128,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,96,128,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,96,128,0,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,96,128,0,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,1,128,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,1,128,0,1,float16,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,1,128,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,2,128,0,1,float16,float16,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,2,128,0,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,2,128,0,1,fp8,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,4,128,0,1,float16,float16,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,4,128,0,1,float16,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,4,128,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,8,128,0,1,float16,float16,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,8,128,0,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,96,8,128,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,96,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,96,128,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,96,128,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,1,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,1,128,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,1,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,2,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,2,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,2,128,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,4,128,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,4,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,4,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,8,128,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,8,128,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,96,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,96,8,128,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,96,128,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,96,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,1,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,1,128,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,1,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,2,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,2,128,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,2,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,4,128,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,4,128,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,4,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,8,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,8,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,96,8,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,1,128,0,1,float16,float16,0,1.2410879929860432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,1,128,0,1,float16,fp8,0,1.2339200178782146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,2,128,0,1,float16,float16,0,1.2555946509043376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,1,128,0,1,fp8,fp8,0,0.8241493701934814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,2,128,0,1,float16,fp8,0,1.2528639634450276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,2,128,0,1,fp8,fp8,0,0.8535040219624838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,4,128,0,1,float16,float16,0,1.3120853106180828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,4,128,0,1,float16,fp8,0,1.296895980834961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,4,128,0,1,fp8,fp8,0,0.8661333719889323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,8,128,0,1,float16,float16,0,1.375402609507243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,8,128,0,1,fp8,fp8,0,0.9379839897155762
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,96,8,128,0,1,float16,fp8,0,1.3542399406433105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,96,128,0,1,float16,float16,0,1.6054612795511882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,96,128,0,1,float16,fp8,0,1.5015254020690918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,1,128,0,1,float16,float16,0,0.562175989151001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,96,128,0,1,fp8,fp8,0,1.2009812990824382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,1,128,0,1,float16,fp8,0,0.5601280132929484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,1,128,0,1,fp8,fp8,0,0.3805866638819377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,2,128,0,1,float16,fp8,0,0.5681493282318115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,2,128,0,1,float16,float16,0,0.5988693237304688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,2,128,0,1,fp8,fp8,0,0.39321601390838623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,4,128,0,1,float16,float16,0,0.5981866518656412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,4,128,0,1,float16,fp8,0,0.5906773408253988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,4,128,0,1,fp8,fp8,0,0.40567465623219806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,8,128,0,1,float16,float16,0,0.6432426770528158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,8,128,0,1,fp8,fp8,0,0.4336640040079753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,96,128,0,1,float16,float16,0,0.7417173385620117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,96,128,0,1,float16,fp8,0,0.6939307053883871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,96,8,128,0,1,float16,fp8,0,0.6318080027898153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,96,128,0,1,fp8,fp8,0,0.5485226710637411
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,1,128,0,1,float16,fp8,0,0.1585493286450704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,1,128,0,1,float16,float16,0,0.17442133029301962
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,1,128,0,1,fp8,fp8,0,0.10956799983978271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,2,128,0,1,float16,float16,0,0.18090667327245077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,2,128,0,1,float16,fp8,0,0.18090667327245077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,2,128,0,1,fp8,fp8,0,0.12168533603350322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,4,128,0,1,float16,float16,0,0.19541333119074503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,4,128,0,1,float16,fp8,0,0.1950719952583313
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,4,128,0,1,fp8,fp8,0,0.1418239971001943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,8,128,0,1,float16,float16,0,0.20377600193023682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,8,128,0,1,float16,fp8,0,0.1996799906094869
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,96,8,128,0,1,fp8,fp8,0,0.16844799121220908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,96,128,0,1,float16,float16,0,0.24541866779327393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,96,128,0,1,float16,fp8,0,0.17800533771514893
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,1,128,0,1,float16,float16,0,0.06502399841944377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,96,128,0,1,fp8,fp8,0,0.19746132691701254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,1,128,0,1,float16,fp8,0,0.06502399841944377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,1,128,0,1,fp8,fp8,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,2,128,0,1,float16,float16,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,2,128,0,1,float16,fp8,0,0.06519466638565063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,2,128,0,1,fp8,fp8,0,0.05273599922657013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,4,128,0,1,float16,float16,0,0.06621866424878438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,4,128,0,1,float16,fp8,0,0.06638933221499126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,8,128,0,1,float16,float16,0,0.06621866424878438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,4,128,0,1,fp8,fp8,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,8,128,0,1,float16,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,96,8,128,0,1,fp8,fp8,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,96,128,0,1,float16,float16,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,96,128,0,1,fp8,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,96,128,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,1,128,0,1,float16,float16,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,1,128,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,1,128,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,2,128,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,2,128,0,1,float16,fp8,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,2,128,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,4,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,4,128,0,1,float16,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,4,128,0,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,8,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,8,128,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,96,8,128,0,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,96,128,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,96,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,96,128,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,1,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,1,128,0,1,float16,fp8,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,1,128,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,2,128,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,2,128,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,2,128,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,4,128,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,4,128,0,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,4,128,0,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,8,128,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,8,128,0,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,96,8,128,0,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,96,128,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,96,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,96,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,1,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,1,128,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,1,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,2,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,2,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,2,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,4,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,4,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,4,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,8,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,8,128,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,96,8,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,96,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,96,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,1,128,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,96,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,1,128,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,2,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,1,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,2,128,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,2,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,4,128,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,4,128,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,4,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,8,128,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,8,128,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,96,8,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,96,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,96,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,96,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,1,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,1,128,0,1,float16,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,1,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,2,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,2,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,2,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,4,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,4,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,4,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,8,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,8,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,96,8,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,1,128,0,1,float16,fp8,0,0.5671253204345703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,1,128,0,1,float16,float16,0,0.5691733360290527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,1,128,0,1,fp8,fp8,0,0.4493653376897176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,2,128,0,1,float16,float16,0,0.5821439822514852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,2,128,0,1,float16,fp8,0,0.5790719985961914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,4,128,0,1,float16,float16,0,0.599722663561503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,2,128,0,1,fp8,fp8,0,0.45772798856099445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,4,128,0,1,float16,fp8,0,0.598527987798055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,4,128,0,1,fp8,fp8,0,0.4761600097020467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,8,128,0,1,float16,float16,0,0.6393173138300577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,8,128,0,1,float16,fp8,0,0.6272000074386597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,96,8,128,0,1,fp8,fp8,0,0.5063680013020834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,96,128,0,1,float16,float16,0,0.7429119745890299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,96,128,0,1,float16,fp8,0,0.6893226305643717
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,96,128,0,1,fp8,fp8,0,0.544597347577413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,1,128,0,1,float16,float16,0,0.19234132766723633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,1,128,0,1,float16,fp8,0,0.1960960030555725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,1,128,0,1,fp8,fp8,0,0.15121066570281982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,2,128,0,1,float16,float16,0,0.1962666710217794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,2,128,0,1,float16,fp8,0,0.1945599913597107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,2,128,0,1,fp8,fp8,0,0.14967466394106546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,4,128,0,1,float16,float16,0,0.20821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,4,128,0,1,fp8,fp8,0,0.17220266660054526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,4,128,0,1,float16,fp8,0,0.2044586737950643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,8,128,0,1,float16,float16,0,0.22749867041905722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,8,128,0,1,float16,fp8,0,0.2198186715443929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,96,8,128,0,1,fp8,fp8,0,0.20548266172409058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,96,128,0,1,float16,float16,0,0.24371200799942017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,96,128,0,1,float16,fp8,0,0.1812480092048645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,96,128,0,1,fp8,fp8,0,0.21128533283869425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,1,128,0,1,float16,float16,0,0.0897706647713979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,1,128,0,1,fp8,fp8,0,0.07133866846561432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,1,128,0,1,float16,fp8,0,0.08942932883898418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,2,128,0,1,float16,float16,0,0.09130666653315227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,2,128,0,1,float16,fp8,0,0.09096533060073853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,2,128,0,1,fp8,fp8,0,0.07167999943097432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,4,128,0,1,float16,float16,0,0.09062400460243225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,4,128,0,1,float16,fp8,0,0.09062400460243225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,4,128,0,1,fp8,fp8,0,0.0721919983625412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,8,128,0,1,float16,float16,0,0.09079466263453166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,8,128,0,1,float16,fp8,0,0.09079466263453166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,96,8,128,0,1,fp8,fp8,0,0.0727040022611618
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,96,128,0,1,float16,float16,0,0.056832000613212585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,96,128,0,1,float16,fp8,0,0.05205333232879639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,96,128,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,1,128,0,1,float16,float16,0,0.04863999783992767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,1,128,0,1,float16,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,1,128,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,2,128,0,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,2,128,0,1,float16,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,2,128,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,4,128,0,1,float16,float16,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,4,128,0,1,float16,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,4,128,0,1,fp8,fp8,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,8,128,0,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,8,128,0,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,96,8,128,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,96,128,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,96,128,0,1,fp8,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,96,128,0,1,float16,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,1,128,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,1,128,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,1,128,0,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,2,128,0,1,float16,float16,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,2,128,0,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,2,128,0,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,4,128,0,1,float16,float16,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,4,128,0,1,float16,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,4,128,0,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,8,128,0,1,float16,float16,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,8,128,0,1,float16,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,96,8,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,96,128,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,96,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,96,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,1,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,1,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,1,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,2,128,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,2,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,2,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,4,128,0,1,float16,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,4,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,8,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,4,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,8,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,96,8,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,96,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,96,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,96,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,1,128,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,1,128,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,1,128,0,1,fp8,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,2,128,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,2,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,2,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,4,128,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,4,128,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,4,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,8,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,8,128,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,96,8,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,96,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,96,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,1,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,96,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,1,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,2,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,2,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,4,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,4,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,4,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,8,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,8,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,96,8,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,96,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,96,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,96,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,1,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,1,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,1,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,2,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,4,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,4,128,0,1,float16,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,4,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,8,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,8,128,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,96,8,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,1,128,0,1,fp8,fp8,0,134.5976359049479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,2,128,0,1,fp8,fp8,0,136.99686686197916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,float16,0,225.75018310546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,fp8,0,227.3070068359375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,float16,0,227.2856648763021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,fp8,0,226.6304931640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,fp8,0,228.2852783203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,float16,0,230.09912109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,4,128,0,1,fp8,fp8,0,134.71504720052084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,64,128,0,1,fp8,fp8,0,72.76868184407552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,float16,0,117.94295247395833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,fp8,0,117.61391194661458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,float16,0,114.29512532552083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,8,128,0,1,fp8,fp8,0,137.0992635091146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,float16,0,229.86905924479166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,1,128,0,1,fp8,fp8,0,67.71165974934895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,fp8,0,110.88844807942708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,2,128,0,1,fp8,fp8,0,68.47709655761719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,fp8,0,228.2767333984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,4,128,0,1,fp8,fp8,0,68.4736836751302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,float16,0,111.42740885416667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,fp8,0,112.8634033203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,float16,0,111.95887247721355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,fp8,0,114.3884785970052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,float16,0,111.42724609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,64,128,0,1,fp8,fp8,0,35.486549377441406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,8,128,0,1,fp8,fp8,0,67.85109456380208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,float16,0,58.487467447916664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,fp8,0,59.32953389485677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,float16,0,55.196329752604164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,fp8,0,115.00390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,1,128,0,1,fp8,fp8,0,32.572245279947914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,fp8,0,55.332183837890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,2,128,0,1,fp8,fp8,0,32.943616231282554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,float16,0,54.7764892578125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,fp8,0,54.94869486490885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,4,128,0,1,fp8,fp8,0,33.31379191080729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,float16,0,56.15138244628906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,fp8,0,55.63477579752604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,float16,0,55.51684061686198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,8,128,0,1,fp8,fp8,0,32.83029429117838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,64,128,0,1,fp8,fp8,0,18.16268793741862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,float16,0,29.308926900227863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,fp8,0,55.80885314941406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,fp8,0,29.00957743326823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,float16,0,27.86048126220703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,fp8,0,27.65277862548828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,1,128,0,1,fp8,fp8,0,16.26760482788086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,2,128,0,1,fp8,fp8,0,16.551253000895183
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,float16,0,27.83795166015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,fp8,0,27.998550415039062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,float16,0,27.922772725423176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,4,128,0,1,fp8,fp8,0,16.647679646809895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,fp8,0,27.682815551757812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,8,128,0,1,fp8,fp8,0,16.569173177083332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,float16,0,27.9369379679362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,fp8,0,27.745961507161457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,1,128,0,1,fp8,fp8,0,81.2745361328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,2,128,0,1,fp8,fp8,0,78.89595540364583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,float16,0,131.60345458984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,fp8,0,133.22649129231772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,float16,0,131.68930053710938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,fp8,0,130.5006103515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,float16,0,130.33881632486978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,fp8,0,130.9868367513021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,4,128,0,1,fp8,fp8,0,81.68789164225261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,64,128,0,1,fp8,fp8,0,41.389567057291664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,float16,0,71.24684651692708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,fp8,0,70.13188171386719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,8,128,0,1,fp8,fp8,0,83.74954732259114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,float16,0,64.84718831380208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,float16,0,131.62871297200522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,fp8,0,65.61177571614583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,1,128,0,1,fp8,fp8,0,37.104469299316406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,fp8,0,131.18429565429688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,2,128,0,1,fp8,fp8,0,38.48601531982422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,float16,0,64.89685567220052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,fp8,0,65.39707946777344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,4,128,0,1,fp8,fp8,0,38.62220764160156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,float16,0,64.35737609863281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,fp8,0,65.48718770345052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,8,128,0,1,fp8,fp8,0,39.31306711832682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,float16,0,64.01552836100261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,64,128,0,1,fp8,fp8,0,20.805461883544922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,float16,0,33.83466593424479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,fp8,0,33.96403249104818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,fp8,0,65.55870056152344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,float16,0,31.383211771647137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,fp8,0,31.85186258951823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,1,128,0,1,fp8,fp8,0,19.403605143229168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,2,128,0,1,fp8,fp8,0,18.927616119384766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,float16,0,31.004842122395832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,fp8,0,31.993855794270832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,4,128,0,1,fp8,fp8,0,18.909695943196613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,float16,0,31.826944986979168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,fp8,0,31.798441569010418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,float16,0,32.104105631510414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,8,128,0,1,fp8,fp8,0,19.393535614013672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,fp8,0,32.50978088378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,64,128,0,1,fp8,fp8,0,10.595669428507486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,float16,0,16.61474100748698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,fp8,0,16.983722686767578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,float16,0,15.92627207438151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,fp8,0,15.890090942382812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,1,128,0,1,fp8,fp8,0,9.57474136352539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,2,128,0,1,fp8,fp8,0,9.265493392944336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,float16,0,15.987711588541666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,fp8,0,16.319658915201824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,4,128,0,1,fp8,fp8,0,9.563648223876953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,float16,0,15.806805928548178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,fp8,0,16.261802673339844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,float16,0,16.184149424235027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,8,128,0,1,fp8,fp8,0,9.626965204874674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,fp8,0,16.024064381917317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,1,128,0,1,fp8,fp8,0,54.10662333170573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,2,128,0,1,fp8,fp8,0,54.12096150716146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,float16,0,94.2237040201823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,float16,0,93.2857157389323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,fp8,0,96.40550740559895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,fp8,0,95.80526733398438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,float16,0,93.00906372070312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,fp8,0,95.0150858561198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,4,128,0,1,fp8,fp8,0,53.98783874511719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,64,128,0,1,fp8,fp8,0,29.72296651204427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,float16,0,49.620992024739586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,8,128,0,1,fp8,fp8,0,54.19775899251302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,fp8,0,49.10353088378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,float16,0,44.72661336263021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,float16,0,95.96348063151042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,fp8,0,44.98534647623698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,1,128,0,1,fp8,fp8,0,26.479446411132812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,fp8,0,95.09358723958333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,2,128,0,1,fp8,fp8,0,27.082069396972656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,float16,0,45.73064676920573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,fp8,0,44.0079345703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,4,128,0,1,fp8,fp8,0,26.703702290852863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,float16,0,45.09013366699219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,fp8,0,44.885162353515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,8,128,0,1,fp8,fp8,0,27.5773442586263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,float16,0,44.86399841308594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,fp8,0,45.1222178141276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,64,128,0,1,fp8,fp8,0,15.182676951090494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,fp8,0,24.029014587402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,float16,0,24.823465983072918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,float16,0,22.35443115234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,fp8,0,22.63091278076172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,1,128,0,1,fp8,fp8,0,13.658794403076172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,2,128,0,1,fp8,fp8,0,13.3657595316569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,float16,0,22.072662353515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,fp8,0,22.394879659016926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,float16,0,22.319956461588543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,4,128,0,1,fp8,fp8,0,13.580458323160807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,fp8,0,23.034708658854168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,8,128,0,1,fp8,fp8,0,13.670059204101562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,float16,0,22.640640258789062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,fp8,0,22.788777669270832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,float16,0,12.06613286336263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,64,128,0,1,fp8,fp8,0,7.378431955973308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,fp8,0,12.363946278889975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,float16,0,11.261781056722006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,fp8,0,11.73623530069987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,1,128,0,1,fp8,fp8,0,6.958421071370442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,2,128,0,1,fp8,fp8,0,6.401536305745442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,float16,0,11.538261413574219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,fp8,0,11.680938720703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,4,128,0,1,fp8,fp8,0,6.781610488891602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,float16,0,11.508735656738281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,fp8,0,11.311786651611328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,float16,0,11.779242197672525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,8,128,0,1,fp8,fp8,0,6.216874440511067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,fp8,0,11.255125681559244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,1,128,0,1,fp8,fp8,0,76.70801289876302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,2,128,0,1,fp8,fp8,0,77.14372253417969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,float16,0,122.86412556966145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,float16,0,120.94344075520833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,fp8,0,123.898193359375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,fp8,0,121.01137288411458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,float16,0,120.38809204101562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,fp8,0,120.78609212239583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,4,128,0,1,fp8,fp8,0,76.3137715657552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,64,128,0,1,fp8,fp8,0,40.338602701822914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,fp8,0,66.07138061523438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,float16,0,67.49081420898438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,float16,0,58.89996846516927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,8,128,0,1,fp8,fp8,0,74.97830200195312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,float16,0,125.25328572591145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,fp8,0,58.81719462076823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,1,128,0,1,fp8,fp8,0,35.147265116373696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,fp8,0,123.64441935221355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,2,128,0,1,fp8,fp8,0,35.35394032796224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,float16,0,59.52989705403646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,fp8,0,61.16983540852865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,float16,0,58.86583455403646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,4,128,0,1,fp8,fp8,0,35.593727111816406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,fp8,0,61.44921366373698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,8,128,0,1,fp8,fp8,0,36.14737192789713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,float16,0,60.60424296061198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,64,128,0,1,fp8,fp8,0,20.51430384318034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,float16,0,32.71185048421224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,fp8,0,32.89565785725912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,float16,0,29.603497823079426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,fp8,0,60.910420735677086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,1,128,0,1,fp8,fp8,0,17.425066630045574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,fp8,0,30.19383494059245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,2,128,0,1,fp8,fp8,0,17.495381673177082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,float16,0,29.037737528483074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,fp8,0,28.920148213704426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,4,128,0,1,fp8,fp8,0,17.79217020670573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,float16,0,30.074198404947918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,fp8,0,29.14611307779948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,fp8,0,30.24554697672526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,float16,0,29.637120564778645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,64,128,0,1,fp8,fp8,0,10.01250139872233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,float16,0,15.91159439086914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,64,8,128,0,1,fp8,fp8,0,18.05294926961263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,fp8,0,15.898111979166666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,float16,0,14.957567850748697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,fp8,0,14.558036804199219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,1,128,0,1,fp8,fp8,0,9.019050598144531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,2,128,0,1,fp8,fp8,0,8.982869466145834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,float16,0,14.709760030110678
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,fp8,0,14.890324910481771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,fp8,0,14.681087493896484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,float16,0,14.79543431599935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,4,128,0,1,fp8,fp8,0,9.23852793375651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,float16,0,14.857898712158203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,8,128,0,1,fp8,fp8,0,8.927573521931967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,float16,0,8.12509854634603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,fp8,0,15.060479482014975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,64,128,0,1,fp8,fp8,0,4.907178560892741
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,fp8,0,8.098133087158203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,float16,0,7.478101094563802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,fp8,0,7.604906717936198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,1,128,0,1,fp8,fp8,0,4.1654612223307295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,2,128,0,1,fp8,fp8,0,4.3473920822143555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,float16,0,6.893226623535156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,fp8,0,7.344128290812175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,4,128,0,1,fp8,fp8,0,4.324010531107585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,float16,0,7.366485595703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,fp8,0,7.716010411580403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,float16,0,7.4779307047526045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,fp8,0,7.361024220784505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,64,8,128,0,1,fp8,fp8,0,4.256085395812988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,1,128,0,1,fp8,fp8,0,42.61137135823568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,2,128,0,1,fp8,fp8,0,43.28533426920573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,float16,0,71.00023396809895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,fp8,0,71.28183492024739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,float16,0,71.25504048665364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,fp8,0,72.94122823079427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,float16,0,71.81567891438802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,fp8,0,75.1464131673177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,4,128,0,1,fp8,fp8,0,42.9667002360026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,64,128,0,1,fp8,fp8,0,25.242111206054688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,float16,0,37.789354960123696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,fp8,0,39.80032094319662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,8,128,0,1,fp8,fp8,0,43.434326171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,float16,0,35.127637227376304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,float16,0,74.18402099609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,fp8,0,34.19153086344401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,fp8,0,72.57787577311198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,1,128,0,1,fp8,fp8,0,20.534954071044922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,2,128,0,1,fp8,fp8,0,20.822869618733723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,float16,0,33.98400115966797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,fp8,0,34.55726877848307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,4,128,0,1,fp8,fp8,0,20.756139119466145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,float16,0,35.3435312906901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,fp8,0,35.20819091796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,8,128,0,1,fp8,fp8,0,20.41309865315755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,float16,0,34.35827128092448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,float16,0,18.931541442871094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,fp8,0,34.51374816894531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,64,128,0,1,fp8,fp8,0,12.263083140055338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,fp8,0,19.645610809326172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,float16,0,17.432064056396484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,fp8,0,17.176063537597656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,1,128,0,1,fp8,fp8,0,10.364927927652994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,2,128,0,1,fp8,fp8,0,10.363221486409506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,float16,0,16.913066864013672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,fp8,0,16.65774917602539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,4,128,0,1,fp8,fp8,0,10.654037475585938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,float16,0,17.328980763753254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,fp8,0,17.3484369913737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,float16,0,17.39468765258789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,fp8,0,17.344682057698567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,64,8,128,0,1,fp8,fp8,0,10.487295786539713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,float16,0,9.703082402547201
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,fp8,0,9.064448038736979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,64,128,0,1,fp8,fp8,0,6.21772829691569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,float16,0,8.343722661336264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,fp8,0,8.714922587076822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,1,128,0,1,fp8,fp8,0,4.770474751790364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,float16,0,8.615594863891602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,fp8,0,8.808277130126953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,2,128,0,1,fp8,fp8,0,5.018111864725749
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,float16,0,8.520874659220377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,4,128,0,1,fp8,fp8,0,4.75869878133138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,fp8,0,8.671573638916016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,float16,0,8.786261240641275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,fp8,0,8.931669235229492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,64,8,128,0,1,fp8,fp8,0,4.931413332621257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,fp8,0,4.565333366394043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,float16,0,4.5810346603393555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,64,128,0,1,fp8,fp8,0,3.0679041544596353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,float16,0,4.049066543579102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,fp8,0,3.907925287882487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,1,128,0,1,fp8,fp8,0,2.409301280975342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,float16,0,4.158464113871257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,fp8,0,3.939839998881022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,2,128,0,1,fp8,fp8,0,2.3152640660603843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,float16,0,3.8256638844807944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,4,128,0,1,fp8,fp8,0,2.3430827458699546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,fp8,0,3.8227628072102866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,float16,0,3.9935998916625977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,fp8,0,4.165802637736003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,64,8,128,0,1,fp8,fp8,0,2.475349267323812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,1,128,0,1,fp8,fp8,0,42.4620361328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,2,128,0,1,fp8,fp8,0,43.149139404296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,float16,0,68.43340555826823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,fp8,0,68.1361083984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,fp8,0,67.80757141113281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,float16,0,69.04234822591145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,float16,0,69.81376139322917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,fp8,0,69.97145589192708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,4,128,0,1,fp8,fp8,0,43.31212870279948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,64,128,0,1,fp8,fp8,0,26.78015899658203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,float16,0,37.35108184814453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,fp8,0,38.60684712727865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,8,128,0,1,fp8,fp8,0,43.507710774739586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,float16,0,32.90623982747396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,float16,0,71.66532389322917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,fp8,0,32.102742513020836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,fp8,0,69.58062744140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,1,128,0,1,fp8,fp8,0,20.28765869140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,2,128,0,1,fp8,fp8,0,19.531776428222656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,float16,0,31.88701883951823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,fp8,0,32.48008473714193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,float16,0,33.19074249267578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,4,128,0,1,fp8,fp8,0,19.705856323242188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,fp8,0,33.531392415364586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,8,128,0,1,fp8,fp8,0,20.515157063802082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,float16,0,32.75929514567057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,fp8,0,33.23272450764974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,64,128,0,1,fp8,fp8,0,13.364053090413412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,float16,0,18.692095438639324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,fp8,0,19.445418039957683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,float16,0,16.017749786376953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,fp8,0,16.33621342976888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,1,128,0,1,fp8,fp8,0,9.98468271891276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,2,128,0,1,fp8,fp8,0,9.858730951944986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,float16,0,15.984639485677084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,fp8,0,16.24627176920573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,float16,0,16.148821512858074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,fp8,0,16.301738739013672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,4,128,0,1,fp8,fp8,0,9.917952219645182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,float16,0,16.543402353922527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,fp8,0,16.062122344970703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,64,8,128,0,1,fp8,fp8,0,10.417493184407553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,float16,0,9.488384246826172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,64,128,0,1,fp8,fp8,0,6.513322830200195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,float16,0,7.850666681925456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,fp8,0,9.448959986368815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,fp8,0,8.072704315185547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,1,128,0,1,fp8,fp8,0,4.819797197977702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,2,128,0,1,fp8,fp8,0,4.497066815694173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,float16,0,7.877461115519206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,fp8,0,8.175786972045898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,float16,0,8.141653060913086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,4,128,0,1,fp8,fp8,0,4.820309321085612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,fp8,0,7.937536239624023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,float16,0,7.848618825276692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,fp8,0,8.364714940388998
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,64,8,128,0,1,fp8,fp8,0,4.7897599538167315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,float16,0,4.659882545471191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,fp8,0,4.682239850362142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,64,128,0,1,fp8,fp8,0,3.2052907943725586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,float16,0,3.8114986419677734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,fp8,0,3.6812801361083984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,1,128,0,1,fp8,fp8,0,2.208085378011068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,fp8,0,3.7032960255940757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,float16,0,3.7748053868611655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,2,128,0,1,fp8,fp8,0,2.20962127049764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,float16,0,3.8000640869140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,4,128,0,1,fp8,fp8,0,2.3459839820861816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,fp8,0,3.6759894688924155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,fp8,0,3.6933972040812173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,float16,0,3.7647358576456704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,64,8,128,0,1,fp8,fp8,0,2.3761919339497886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,fp8,0,2.2992213567097983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,float16,0,2.350762685139974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,64,128,0,1,fp8,fp8,0,1.5858346621195476
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,float16,0,1.826815923055013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,fp8,0,1.7771520614624023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,1,128,0,1,fp8,fp8,0,1.1139413515726726
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,float16,0,1.7795413335164387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,2,128,0,1,fp8,fp8,0,1.0941440264383953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,fp8,0,1.783125400543213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,float16,0,1.808725357055664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,fp8,0,1.8276693026224773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,4,128,0,1,fp8,fp8,0,1.1564373175303142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,float16,0,1.8483200073242188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,fp8,0,1.8150399525960286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,64,8,128,0,1,fp8,fp8,0,1.1564373175303142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,1,128,0,1,fp8,fp8,0,26.0862299601237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,2,128,0,1,fp8,fp8,0,24.685226440429688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,fp8,0,39.811754862467446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,float16,0,39.74946085611979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,fp8,0,39.91313171386719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,float16,0,40.14199574788412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,fp8,0,39.65627797444662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,float16,0,39.97969055175781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,4,128,0,1,fp8,fp8,0,25.776639302571613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,64,128,0,1,fp8,fp8,0,17.35970179239909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,float16,0,18.557440439860027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,8,128,0,1,fp8,fp8,0,25.67731221516927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,float16,0,23.858004252115887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,fp8,0,23.761067708333332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,float16,0,39.78734842936198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,fp8,0,40.054613749186196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,fp8,0,19.592533111572266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,1,128,0,1,fp8,fp8,0,11.933695475260416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,2,128,0,1,fp8,fp8,0,12.139690399169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,float16,0,19.199488321940105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,fp8,0,19.22525914510091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,fp8,0,19.191295623779297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,float16,0,19.937450408935547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,4,128,0,1,fp8,fp8,0,11.86730702718099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,8,128,0,1,fp8,fp8,0,12.41326904296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,float16,0,20.240554809570312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,fp8,0,19.256661732991535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,float16,0,11.696980794270834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,64,128,0,1,fp8,fp8,0,8.532480239868164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,float16,0,9.73141352335612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,fp8,0,11.623765309651693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,fp8,0,9.685162862141928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,1,128,0,1,fp8,fp8,0,5.499733606974284
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,float16,0,9.741482416788736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,2,128,0,1,fp8,fp8,0,5.597866694132487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,4,128,0,1,fp8,fp8,0,5.888512293497722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,fp8,0,9.893717447916666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,float16,0,9.574911753336588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,fp8,0,10.06830914815267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,float16,0,9.614677429199219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,fp8,0,9.70854377746582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,64,8,128,0,1,fp8,fp8,0,6.032725016276042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,64,128,0,1,fp8,fp8,0,4.177920023600261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,float16,0,5.821781158447266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,fp8,0,5.772970835367839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,float16,0,4.312746683756511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,fp8,0,4.580010732014974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,1,128,0,1,fp8,fp8,0,2.7335678736368814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,2,128,0,1,fp8,fp8,0,2.7267414728800454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,4,128,0,1,fp8,fp8,0,2.7820374170939126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,float16,0,4.437504132588704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,fp8,0,4.409173329671224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,fp8,0,4.3987627029418945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,float16,0,4.495018641153972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,float16,0,4.607317288716634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,fp8,0,4.533930778503418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,64,8,128,0,1,fp8,fp8,0,2.8625920613606772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,float16,0,2.927445411682129
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,1,128,0,1,fp8,fp8,0,1.368234634399414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,fp8,0,2.8699305852254233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,float16,0,2.120704015096029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,fp8,0,2.161834716796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,float16,0,2.184874693552653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,64,128,0,1,fp8,fp8,0,2.050389289855957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,fp8,0,2.1609813372294107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,2,128,0,1,fp8,fp8,0,1.3393920262654622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,float16,0,2.237610658009847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,4,128,0,1,fp8,fp8,0,1.3597013155619304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,fp8,0,2.1763413747151694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,float16,0,2.305194695790609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,fp8,0,2.2604799270629883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,64,8,128,0,1,fp8,fp8,0,1.4231893221537273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,float16,0,1.469098726908366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,fp8,0,1.422335942586263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,64,128,0,1,fp8,fp8,0,0.9905493259429932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,float16,0,1.0478933652242024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,fp8,0,1.050282637278239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,1,128,0,1,fp8,fp8,0,0.6893226305643717
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,float16,0,1.0644480387369792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,fp8,0,1.0714453061421711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,2,128,0,1,fp8,fp8,0,0.6871039867401123
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,float16,0,1.0542079607645671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,4,128,0,1,fp8,fp8,0,0.6768639882405599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,fp8,0,1.087488015492757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,float16,0,1.066325346628825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,fp8,0,1.0915839672088623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,64,8,128,0,1,fp8,fp8,0,0.7082666556040446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,1,128,0,1,fp8,fp8,0,26.40093739827474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,2,128,0,1,fp8,fp8,0,26.1562016805013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,float16,0,40.11724853515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,fp8,0,39.98651631673177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,float16,0,40.31573232014974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,fp8,0,40.01399485270182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,float16,0,40.046251932779946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,fp8,0,39.76567586263021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,4,128,0,1,fp8,fp8,0,26.914815266927082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,float16,0,19.08684794108073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,64,128,0,1,fp8,fp8,0,19.565909067789715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,float16,0,25.10455576578776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,float16,0,38.885205586751304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,fp8,0,25.321983337402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,8,128,0,1,fp8,fp8,0,27.91014353434245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,fp8,0,40.74410756429037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,1,128,0,1,fp8,fp8,0,11.66745630900065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,fp8,0,18.967722574869793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,2,128,0,1,fp8,fp8,0,11.90826670328776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,float16,0,19.480064392089844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,fp8,0,18.507434844970703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,fp8,0,19.562154134114582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,float16,0,19.82549285888672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,4,128,0,1,fp8,fp8,0,12.553045908610025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,8,128,0,1,fp8,fp8,0,12.343978881835938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,fp8,0,18.501290639241535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,float16,0,19.433984120686848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,float16,0,12.798805236816406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,float16,0,9.306453069051107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,64,128,0,1,fp8,fp8,0,9.556650797526041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,fp8,0,12.458666483561197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,fp8,0,9.963008244832357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,1,128,0,1,fp8,fp8,0,5.613909403483073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,float16,0,9.288703918457031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,fp8,0,9.1779416402181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,2,128,0,1,fp8,fp8,0,5.8111998240153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,4,128,0,1,fp8,fp8,0,5.868714650472005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,float16,0,9.292970657348633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,fp8,0,9.518250783284506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,float16,0,9.604437510172525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,fp8,0,9.001813252766928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,64,8,128,0,1,fp8,fp8,0,6.01258659362793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,float16,0,6.319445292154948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,fp8,0,6.238549550374349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,64,128,0,1,fp8,fp8,0,4.70033073425293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,float16,0,4.4822187423706055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,fp8,0,4.327765464782715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,1,128,0,1,fp8,fp8,0,2.770432154337565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,float16,0,4.294997215270996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,fp8,0,4.318719863891602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,2,128,0,1,fp8,fp8,0,2.797226587931315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,4,128,0,1,fp8,fp8,0,2.9202772776285806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,fp8,0,4.374357223510742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,float16,0,4.528640111287435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,float16,0,4.473002751668294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,fp8,0,4.615850766499837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,64,8,128,0,1,fp8,fp8,0,2.9564587275187173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,float16,0,3.1378774642944336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,fp8,0,3.0962346394856772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,64,128,0,1,fp8,fp8,0,2.3309653600056968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,float16,0,2.1114880243937173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,fp8,0,2.112170696258545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,1,128,0,1,fp8,fp8,0,1.374549388885498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,fp8,0,2.1167786916097007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,float16,0,2.153301397959391
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,2,128,0,1,fp8,fp8,0,1.3713067372639973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,float16,0,2.184021313985189
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,4,128,0,1,fp8,fp8,0,1.3911040623982747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,fp8,0,2.2338560422261557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,float16,0,2.2338560422261557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,fp8,0,2.265941301981608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,64,8,128,0,1,fp8,fp8,0,1.4607359568277996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,fp8,0,1.5148372650146484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,64,128,0,1,fp8,fp8,0,1.1144533157348633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,float16,0,1.5703040758768718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,float16,0,0.9890133539835612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,fp8,0,1.0154666900634766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,1,128,0,1,fp8,fp8,0,0.6444373528162638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,fp8,0,0.9939626852671305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,float16,0,1.0190506776173909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,2,128,0,1,fp8,fp8,0,0.6514346599578857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,4,128,0,1,fp8,fp8,0,0.659114678700765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,float16,0,1.0156373182932537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,fp8,0,1.017514705657959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,float16,0,1.0671786467234294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,fp8,0,1.0313386917114258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,64,8,128,0,1,fp8,fp8,0,0.7034880320231119
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,64,128,0,1,fp8,fp8,0,0.5224106709162394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,fp8,0,0.726698637008667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,float16,0,0.5312853256861368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,1,128,0,1,fp8,fp8,0,0.3314346671104431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,fp8,0,0.5277013381322225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,float16,0,0.7741440137227377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,float16,0,0.5277013381322225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,fp8,0,0.5254826545715332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,2,128,0,1,fp8,fp8,0,0.32477867603302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,float16,0,0.5253119866053263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,fp8,0,0.532480001449585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,4,128,0,1,fp8,fp8,0,0.33433600266774494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,float16,0,0.5280426740646362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,8,128,0,1,fp8,fp8,0,0.33058132727940875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,fp8,0,0.5369173288345337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,1,128,0,1,fp8,fp8,0,15.524693806966146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,2,128,0,1,fp8,fp8,0,16.015188852945965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,float16,0,23.561045328776043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,fp8,0,23.39379119873047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,float16,0,23.25367482503255
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,fp8,0,23.8549321492513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,float16,0,23.584426879882812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,fp8,0,23.7489496866862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,4,128,0,1,fp8,fp8,0,16.24285888671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,float16,0,16.604671478271484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,64,128,0,1,fp8,fp8,0,12.991146087646484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,fp8,0,16.69768524169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,float16,0,24.805376688639324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,8,128,0,1,fp8,fp8,0,17.137322743733723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,float16,0,11.230037689208984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,fp8,0,24.144724527994793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,fp8,0,10.873172760009766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,1,128,0,1,fp8,fp8,0,7.093760172526042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,2,128,0,1,fp8,fp8,0,7.416320164998372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,float16,0,11.292500813802084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,4,128,0,1,fp8,fp8,0,7.391743977864583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,fp8,0,11.03104019165039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,float16,0,11.330047607421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,fp8,0,11.351552327473959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,float16,0,11.585365295410156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,fp8,0,11.628031412760416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,64,8,128,0,1,fp8,fp8,0,7.8510080973307295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,float16,0,8.249343872070312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,64,128,0,1,fp8,fp8,0,6.556842803955078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,fp8,0,8.141653060913086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,float16,0,5.415765126546224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,fp8,0,5.48744519551595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,1,128,0,1,fp8,fp8,0,3.5138559341430664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,2,128,0,1,fp8,fp8,0,3.5800746281941733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,float16,0,5.487103780110677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,fp8,0,5.474986394246419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,fp8,0,5.472597122192383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,float16,0,5.733205159505208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,4,128,0,1,fp8,fp8,0,3.6312745412190757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,float16,0,5.67193603515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,fp8,0,5.523967742919922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,64,8,128,0,1,fp8,fp8,0,3.8229331970214844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,float16,0,4.14737065633138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,64,128,0,1,fp8,fp8,0,3.132416089375814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,fp8,0,4.069546699523926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,float16,0,2.707285245259603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,fp8,0,2.629631996154785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,1,128,0,1,fp8,fp8,0,1.7372159957885742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,float16,0,2.662399927775065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,fp8,0,2.663935979207357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,2,128,0,1,fp8,fp8,0,1.7551360130310059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,float16,0,2.709162712097168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,fp8,0,2.7368106842041016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,4,128,0,1,fp8,fp8,0,1.8242559432983398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,float16,0,2.8045654296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,fp8,0,2.832042694091797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,64,8,128,0,1,fp8,fp8,0,1.8868907292683919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,float16,0,2.0645546913146973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,fp8,0,2.0145492553710938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,64,128,0,1,fp8,fp8,0,1.5150079727172852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,float16,0,1.2868266900380452
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,fp8,0,1.300821304321289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,1,128,0,1,fp8,fp8,0,0.8340480327606201
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,float16,0,1.3100372950236003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,fp8,0,1.285802682240804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,2,128,0,1,fp8,fp8,0,0.854528029759725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,float16,0,1.3370025952657063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,fp8,0,1.307477315266927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,4,128,0,1,fp8,fp8,0,0.8823466300964355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,float16,0,1.4086826642354329
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,fp8,0,1.3607254028320312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,float16,0,1.0089813073476155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,64,8,128,0,1,fp8,fp8,0,0.9359359741210938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,fp8,0,0.9729706446329752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,float16,0,0.5920426845550537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,64,128,0,1,fp8,fp8,0,0.7354026635487875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,fp8,0,0.5937493244806925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,1,128,0,1,fp8,fp8,0,0.3978240092595418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,float16,0,0.584874669710795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,fp8,0,0.5870933135350546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,2,128,0,1,fp8,fp8,0,0.39185067017873126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,float16,0,0.5937493244806925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,fp8,0,0.5913600126902262
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,4,128,0,1,fp8,fp8,0,0.39816534519195557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,fp8,0,0.6283946832021078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,float16,0,0.6282240152359009
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,64,8,128,0,1,fp8,fp8,0,0.41608532269795734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,float16,0,0.42803200085957843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,fp8,0,0.39765334129333496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,float16,0,0.32682667175928753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,64,128,0,1,fp8,fp8,0,0.3237546682357788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,fp8,0,0.3322880069414775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,1,128,0,1,fp8,fp8,0,0.2152106761932373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,float16,0,0.3264853358268738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,fp8,0,0.32204800844192505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,2,128,0,1,fp8,fp8,0,0.2135039965311686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,fp8,0,0.3309226632118225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,float16,0,0.33553067843119305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,4,128,0,1,fp8,fp8,0,0.20855466524759927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,float16,0,0.33826132615407306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,fp8,0,0.33928533395131427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,64,8,128,0,1,fp8,fp8,0,0.20787199338277182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,1,128,0,1,fp8,fp8,0,15.979007720947266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,fp8,0,23.169024149576824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,float16,0,23.483904520670574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,2,128,0,1,fp8,fp8,0,16.354816436767578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,float16,0,23.590059916178387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,fp8,0,23.720789591471355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,fp8,0,24.141141255696613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,float16,0,24.055979410807293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,4,128,0,1,fp8,fp8,0,16.451754252115887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,8,128,0,1,fp8,fp8,0,17.33905029296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,float16,0,23.627092997233074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,fp8,0,23.841451009114582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,float16,0,11.638272603352865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,64,128,0,1,fp8,fp8,0,15.133354187011719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,fp8,0,17.649152119954426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,float16,0,18.836650848388672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,fp8,0,11.366058349609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,1,128,0,1,fp8,fp8,0,7.849301020304362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,float16,0,11.372544606526693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,2,128,0,1,fp8,fp8,0,8.154624303181967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,fp8,0,11.40292231241862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,4,128,0,1,fp8,fp8,0,8.192341486612955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,fp8,0,11.63144556681315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,float16,0,11.753130594889322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,float16,0,11.43381373087565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,fp8,0,11.753983815511068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,64,8,128,0,1,fp8,fp8,0,8.36130142211914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,float16,0,5.684053421020508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,fp8,0,5.689685185750325
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,64,128,0,1,fp8,fp8,0,7.53544553120931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,fp8,0,8.785578409830729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,float16,0,9.363114674886068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,1,128,0,1,fp8,fp8,0,3.815082550048828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,float16,0,5.7045332590738935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,2,128,0,1,fp8,fp8,0,3.8377812703450522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,fp8,0,5.671082814534505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,float16,0,5.718869527180989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,4,128,0,1,fp8,fp8,0,3.9818239212036133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,fp8,0,5.814442952473958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,float16,0,6.132053375244141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,fp8,0,5.771263758341472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,64,8,128,0,1,fp8,fp8,0,4.171264012654622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,float16,0,4.617557207743327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,fp8,0,4.348416010538737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,64,128,0,1,fp8,fp8,0,3.764224052429199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,float16,0,2.72981325785319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,fp8,0,2.7642879486083984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,1,128,0,1,fp8,fp8,0,1.8677760759989421
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,float16,0,2.758314768473307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,fp8,0,2.776576042175293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,2,128,0,1,fp8,fp8,0,1.8720426559448242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,float16,0,2.8202667236328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,4,128,0,1,fp8,fp8,0,1.9309226671854656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,fp8,0,2.82914129892985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,float16,0,2.963456153869629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,fp8,0,2.9723307291666665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,64,8,128,0,1,fp8,fp8,0,2.0563626289367676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,float16,0,2.3432532946268716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,fp8,0,2.195626735687256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,64,128,0,1,fp8,fp8,0,1.8310826619466145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,float16,0,1.3337599436442058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,fp8,0,1.3527040481567383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,1,128,0,1,fp8,fp8,0,0.9183573722839355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,fp8,0,1.3527040481567383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,float16,0,1.3895680109659831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,2,128,0,1,fp8,fp8,0,0.9297920068105062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,float16,0,1.3887146313985188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,fp8,0,1.3849600156148274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,4,128,0,1,fp8,fp8,0,0.9519786834716797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,float16,0,1.4682453473409016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,fp8,0,1.4537386894226074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,64,8,128,0,1,fp8,fp8,0,1.0091520150502522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,fp8,0,1.0943146546681721
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,float16,0,1.1653119723002117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,64,128,0,1,fp8,fp8,0,0.8586239814758301
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,float16,0,0.6041599909464518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,fp8,0,0.59443199634552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,1,128,0,1,fp8,fp8,0,0.41830400625864667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,fp8,0,0.6079146862030029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,float16,0,0.6203733285268148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,2,128,0,1,fp8,fp8,0,0.43246932824452716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,float16,0,0.6427306731541952
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,fp8,0,0.6203733285268148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,4,128,0,1,fp8,fp8,0,0.4459520181020101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,float16,0,0.66594131787618
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,fp8,0,0.664405345916748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,64,8,128,0,1,fp8,fp8,0,0.48401065667470294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,fp8,0,0.5077333450317383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,float16,0,0.5517653226852417
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,64,128,0,1,fp8,fp8,0,0.40584532419840497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,float16,0,0.2867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,fp8,0,0.29064534107844037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,1,128,0,1,fp8,fp8,0,0.1723733345667521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,float16,0,0.29525333642959595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,fp8,0,0.291157325108846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,2,128,0,1,fp8,fp8,0,0.17442133029301962
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,float16,0,0.2945706645647685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,4,128,0,1,fp8,fp8,0,0.1718613306681315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,float16,0,0.29201066493988037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,fp8,0,0.2949120004971822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,fp8,0,0.28962133328119916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,64,8,128,0,1,fp8,fp8,0,0.17971199750900269
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,float16,0,0.17971199750900269
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,fp8,0,0.16793600718180338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,64,128,0,1,fp8,fp8,0,0.10786133011182149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,float16,0,0.14916266997655234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,fp8,0,0.15633066495259604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,1,128,0,1,fp8,fp8,0,0.10291199882825215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,float16,0,0.15069866180419922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,fp8,0,0.15223466356595358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,2,128,0,1,fp8,fp8,0,0.1032533347606659
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,float16,0,0.15411200126012167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,4,128,0,1,fp8,fp8,0,0.10120532910029094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,fp8,0,0.15769599874814352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,float16,0,0.15735466281572977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,8,128,0,1,fp8,fp8,0,0.1288533310095469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,fp8,0,0.159061332543691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,1,128,0,1,float16,float16,0,17.08680470784505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,1,128,0,1,fp8,fp8,0,12.633258819580078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,1,128,0,1,float16,fp8,0,16.809642791748047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,2,128,0,1,fp8,fp8,0,12.795903523763021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,2,128,0,1,float16,float16,0,17.09329096476237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,4,128,0,1,float16,float16,0,16.876202901204426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,2,128,0,1,float16,fp8,0,17.212586720784504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,4,128,0,1,float16,fp8,0,17.162923177083332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,4,128,0,1,fp8,fp8,0,13.309440612792969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,8,128,0,1,float16,float16,0,17.16531244913737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,8,128,0,1,fp8,fp8,0,13.537792205810547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,64,8,128,0,1,float16,fp8,0,16.959487915039062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,1,128,0,1,float16,float16,0,8.292693456013998
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,64,128,0,1,float16,float16,0,17.118208567301433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,64,128,0,1,fp8,fp8,0,14.097408294677734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,64,128,0,1,float16,fp8,0,16.09130605061849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,1,128,0,1,float16,fp8,0,8.296277364095053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,1,128,0,1,fp8,fp8,0,6.091434478759766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,2,128,0,1,float16,fp8,0,8.048639933268229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,2,128,0,1,float16,float16,0,8.210261027018229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,2,128,0,1,fp8,fp8,0,6.210901260375977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,4,128,0,1,float16,float16,0,8.261973063151041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,4,128,0,1,fp8,fp8,0,6.247082392374675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,4,128,0,1,float16,fp8,0,8.351231892903646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,8,128,0,1,float16,float16,0,8.62122662862142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,8,128,0,1,float16,fp8,0,8.421717325846354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,64,8,128,0,1,fp8,fp8,0,6.569471995035808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,64,128,0,1,float16,float16,0,8.574464162190756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,64,128,0,1,float16,fp8,0,8.069973627726236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,1,128,0,1,float16,float16,0,4.015957196553548
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,64,128,0,1,fp8,fp8,0,7.0143998463948565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,1,128,0,1,float16,fp8,0,4.056917190551758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,1,128,0,1,fp8,fp8,0,2.9550933837890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,2,128,0,1,float16,float16,0,4.097536087036133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,2,128,0,1,float16,fp8,0,4.046506563822429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,2,128,0,1,fp8,fp8,0,2.9812053044637046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,4,128,0,1,float16,float16,0,4.260693232218425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,4,128,0,1,fp8,fp8,0,3.0837761561075845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,4,128,0,1,float16,fp8,0,4.021930694580078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,8,128,0,1,float16,float16,0,4.3224747975667315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,8,128,0,1,fp8,fp8,0,3.255125363667806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,64,8,128,0,1,float16,fp8,0,4.286293347676595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,64,128,0,1,float16,float16,0,4.300458590189616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,64,128,0,1,float16,fp8,0,4.037461280822754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,1,128,0,1,float16,float16,0,1.9488426844278972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,64,128,0,1,fp8,fp8,0,3.4148693084716797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,1,128,0,1,float16,fp8,0,1.9415040016174316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,1,128,0,1,fp8,fp8,0,1.3943467140197754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,2,128,0,1,float16,fp8,0,1.9938987096150715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,2,128,0,1,float16,float16,0,2.0019200642903647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,2,128,0,1,fp8,fp8,0,1.4112426439921062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,4,128,0,1,float16,float16,0,2.046463966369629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,4,128,0,1,float16,fp8,0,2.019327958424886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,4,128,0,1,fp8,fp8,0,1.4999893506368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,8,128,0,1,float16,float16,0,2.1357226371765137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,8,128,0,1,fp8,fp8,0,1.6058026949564617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,64,128,0,1,float16,float16,0,2.1695146560668945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,64,8,128,0,1,float16,fp8,0,2.1367467244466147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,64,128,0,1,float16,fp8,0,2.029226620992025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,64,128,0,1,fp8,fp8,0,1.6585386594136555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,1,128,0,1,float16,float16,0,0.9495893319447836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,1,128,0,1,float16,fp8,0,0.9705813725789388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,1,128,0,1,fp8,fp8,0,0.6661119858423868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,2,128,0,1,float16,float16,0,0.9775786399841309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,2,128,0,1,float16,fp8,0,0.9724586804707845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,4,128,0,1,float16,float16,0,1.002837340037028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,2,128,0,1,fp8,fp8,0,0.6978560288747152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,4,128,0,1,float16,fp8,0,0.9859413305918375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,4,128,0,1,fp8,fp8,0,0.7215786774953207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,8,128,0,1,float16,float16,0,1.0796373685201008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,8,128,0,1,float16,fp8,0,1.0422613620758057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,64,8,128,0,1,fp8,fp8,0,0.7835306326548258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,64,128,0,1,float16,float16,0,1.0415786902109783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,64,128,0,1,float16,fp8,0,0.981503963470459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,1,128,0,1,float16,float16,0,0.4073813358942668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,64,128,0,1,fp8,fp8,0,0.7760213216145834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,1,128,0,1,float16,fp8,0,0.40857601165771484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,1,128,0,1,fp8,fp8,0,0.3068586587905884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,2,128,0,1,float16,float16,0,0.4159146547317505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,2,128,0,1,float16,fp8,0,0.41437868277231854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,2,128,0,1,fp8,fp8,0,0.3165866732597351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,4,128,0,1,float16,float16,0,0.43485867977142334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,4,128,0,1,float16,fp8,0,0.42922667662302655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,8,128,0,1,float16,float16,0,0.4766720136006673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,4,128,0,1,fp8,fp8,0,0.33672531445821124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,8,128,0,1,fp8,fp8,0,0.36232535044352215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,64,8,128,0,1,float16,fp8,0,0.47035733858744305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,64,128,0,1,float16,fp8,0,0.4135253429412842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,64,128,0,1,float16,float16,0,0.45431466897328693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,64,128,0,1,fp8,fp8,0,0.3466240167617798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,1,128,0,1,float16,float16,0,0.19490132729212442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,2,128,0,1,float16,float16,0,0.19353600343068442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,2,128,0,1,float16,fp8,0,0.19933867454528809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,1,128,0,1,float16,fp8,0,0.19114667177200317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,1,128,0,1,fp8,fp8,0,0.11468799908955891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,2,128,0,1,fp8,fp8,0,0.1155413289864858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,4,128,0,1,float16,fp8,0,0.19336533546447754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,4,128,0,1,fp8,fp8,0,0.11434666315714519
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,4,128,0,1,float16,float16,0,0.19336533546447754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,8,128,0,1,float16,float16,0,0.19165867567062378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,8,128,0,1,float16,fp8,0,0.20241065820058188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,64,8,128,0,1,fp8,fp8,0,0.1155413289864858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,64,128,0,1,float16,float16,0,0.12185600399971008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,64,128,0,1,float16,fp8,0,0.1088853379090627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,64,128,0,1,fp8,fp8,0,0.07287466526031494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,1,128,0,1,float16,float16,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,1,128,0,1,float16,fp8,0,0.09796266754468282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,1,128,0,1,fp8,fp8,0,0.06673066814740498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,2,128,0,1,float16,float16,0,0.09830400347709656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,2,128,0,1,float16,fp8,0,0.09693866968154907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,2,128,0,1,fp8,fp8,0,0.06587733328342438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,4,128,0,1,float16,fp8,0,0.09727999567985535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,4,128,0,1,float16,float16,0,0.09710933764775594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,4,128,0,1,fp8,fp8,0,0.06638933221499126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,8,128,0,1,float16,float16,0,0.09642666578292847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,8,128,0,1,float16,fp8,0,0.09779199957847595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,64,8,128,0,1,fp8,fp8,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,64,128,0,1,float16,float16,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,64,128,0,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,64,128,0,1,float16,fp8,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,1,128,0,1,float16,float16,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,1,128,0,1,float16,fp8,0,0.0602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,1,128,0,1,fp8,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,2,128,0,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,2,128,0,1,float16,fp8,0,0.06058666606744131
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,2,128,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,4,128,0,1,float16,float16,0,0.0576853354771932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,4,128,0,1,fp8,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,4,128,0,1,float16,fp8,0,0.059392000238100685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,8,128,0,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,8,128,0,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,64,8,128,0,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,1,128,0,1,float16,float16,0,7.1301116943359375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,1,128,0,1,float16,fp8,0,7.146666844685872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,1,128,0,1,fp8,fp8,0,5.211818695068359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,2,128,0,1,fp8,fp8,0,5.398698806762695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,2,128,0,1,float16,float16,0,7.2282454172770185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,2,128,0,1,float16,fp8,0,7.1485443115234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,4,128,0,1,float16,float16,0,7.53271484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,4,128,0,1,float16,fp8,0,7.424341201782227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,4,128,0,1,fp8,fp8,0,5.573290506998698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,8,128,0,1,float16,float16,0,7.990613301595052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,8,128,0,1,float16,fp8,0,7.82813835144043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,64,8,128,0,1,fp8,fp8,0,5.917013168334961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,1,128,0,1,float16,float16,0,3.433984120686849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,64,128,0,1,fp8,fp8,0,6.825813293457031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,64,128,0,1,float16,float16,0,8.531967798868815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,64,128,0,1,float16,fp8,0,8.009386698404947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,1,128,0,1,float16,fp8,0,3.438591957092285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,1,128,0,1,fp8,fp8,0,2.4690346717834473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,2,128,0,1,float16,float16,0,3.5085652669270835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,2,128,0,1,float16,fp8,0,3.483818689982096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,2,128,0,1,fp8,fp8,0,2.566826661427816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,4,128,0,1,float16,float16,0,3.6705280939737954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,4,128,0,1,float16,fp8,0,3.6331520080566406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,4,128,0,1,fp8,fp8,0,2.692437489827474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,8,128,0,1,float16,float16,0,3.9763625462849936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,8,128,0,1,fp8,fp8,0,2.939903895060221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,64,8,128,0,1,float16,fp8,0,3.917994817097982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,64,128,0,1,float16,float16,0,4.28817081451416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,1,128,0,1,float16,float16,0,1.8165760040283203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,64,128,0,1,float16,fp8,0,4.043775876363118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,64,128,0,1,fp8,fp8,0,3.4263038635253906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,1,128,0,1,float16,fp8,0,1.807360013326009
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,1,128,0,1,fp8,fp8,0,1.2402346928914387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,2,128,0,1,float16,float16,0,1.8452480634053547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,2,128,0,1,fp8,fp8,0,1.2878506978352864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,2,128,0,1,float16,fp8,0,1.8399573961893718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,4,128,0,1,float16,float16,0,1.8205013275146484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,4,128,0,1,float16,fp8,0,1.924949328104655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,4,128,0,1,fp8,fp8,0,1.353216012318929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,8,128,0,1,float16,float16,0,1.9848532676696777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,8,128,0,1,float16,fp8,0,1.9512319564819336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,64,8,128,0,1,fp8,fp8,0,1.4347947438557942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,64,128,0,1,float16,fp8,0,2.0266666412353516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,64,128,0,1,float16,float16,0,2.1544960339864097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,1,128,0,1,float16,float16,0,0.799402634302775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,64,128,0,1,fp8,fp8,0,1.612287998199463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,1,128,0,1,float16,fp8,0,0.8002560138702393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,1,128,0,1,fp8,fp8,0,0.5628586610158285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,2,128,0,1,float16,float16,0,0.8198826313018799
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,2,128,0,1,float16,fp8,0,0.8156159718831381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,2,128,0,1,fp8,fp8,0,0.5882879892985026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,4,128,0,1,float16,fp8,0,0.866645336151123
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,4,128,0,1,fp8,fp8,0,0.6193493207295736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,4,128,0,1,float16,float16,0,0.86954665184021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,8,128,0,1,float16,float16,0,0.9582933584849039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,8,128,0,1,float16,fp8,0,0.9371306896209717
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,64,8,128,0,1,fp8,fp8,0,0.6860799789428711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,64,128,0,1,float16,float16,0,1.0504533449808757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,1,128,0,1,float16,float16,0,0.3193173408508301
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,64,128,0,1,fp8,fp8,0,0.7627092997233073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,1,128,0,1,float16,fp8,0,0.3131733338038127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,64,128,0,1,float16,fp8,0,0.9765546321868896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,1,128,0,1,fp8,fp8,0,0.24627200762430826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,2,128,0,1,float16,fp8,0,0.328874667485555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,2,128,0,1,float16,float16,0,0.3283626635869344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,2,128,0,1,fp8,fp8,0,0.25804799795150757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,4,128,0,1,float16,float16,0,0.34986666838328045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,4,128,0,1,float16,fp8,0,0.340992013613383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,4,128,0,1,fp8,fp8,0,0.272213339805603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,8,128,0,1,float16,float16,0,0.39628799756368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,8,128,0,1,float16,fp8,0,0.385535995165507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,64,8,128,0,1,fp8,fp8,0,0.30907734235127765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,64,128,0,1,float16,fp8,0,0.40379734834035236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,64,128,0,1,float16,float16,0,0.44697598616282147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,1,128,0,1,float16,float16,0,0.130730668703715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,64,128,0,1,fp8,fp8,0,0.31487999359766644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,1,128,0,1,float16,fp8,0,0.13619200388590494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,2,128,0,1,float16,float16,0,0.1360213359196981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,1,128,0,1,fp8,fp8,0,0.08447999755541484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,2,128,0,1,float16,fp8,0,0.14062933127085367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,2,128,0,1,fp8,fp8,0,0.08584533135096233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,4,128,0,1,float16,float16,0,0.13687466581662497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,4,128,0,1,float16,fp8,0,0.13346133629480997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,4,128,0,1,fp8,fp8,0,0.08550399541854858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,8,128,0,1,float16,float16,0,0.1346560021241506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,8,128,0,1,float16,fp8,0,0.13567999998728433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,64,8,128,0,1,fp8,fp8,0,0.08516266942024231
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,64,128,0,1,float16,float16,0,0.09710933764775594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,64,128,0,1,float16,fp8,0,0.08328533172607422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,64,128,0,1,fp8,fp8,0,0.05580799778302511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,1,128,0,1,float16,float16,0,0.06929066777229309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,1,128,0,1,float16,fp8,0,0.06656000018119812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,1,128,0,1,fp8,fp8,0,0.048810665806134544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,2,128,0,1,float16,float16,0,0.06894933183987935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,2,128,0,1,float16,fp8,0,0.06809600194295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,2,128,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,4,128,0,1,float16,float16,0,0.06690133114655812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,4,128,0,1,float16,fp8,0,0.06809600194295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,4,128,0,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,8,128,0,1,float16,float16,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,8,128,0,1,float16,fp8,0,0.06758399804433186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,64,8,128,0,1,fp8,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,64,128,0,1,float16,float16,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,64,128,0,1,float16,fp8,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,64,128,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,1,128,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,1,128,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,2,128,0,1,float16,float16,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,1,128,0,1,float16,float16,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,2,128,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,2,128,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,4,128,0,1,float16,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,4,128,0,1,float16,float16,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,4,128,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,8,128,0,1,float16,float16,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,8,128,0,1,float16,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,64,8,128,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,64,128,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,64,128,0,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,64,128,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,1,128,0,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,1,128,0,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,1,128,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,2,128,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,2,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,2,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,4,128,0,1,float16,float16,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,4,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,4,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,8,128,0,1,float16,float16,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,8,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,64,8,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,1,128,0,1,float16,float16,0,3.450197219848633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,1,128,0,1,fp8,fp8,0,2.4094719886779785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,1,128,0,1,float16,fp8,0,3.4551467895507812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,2,128,0,1,float16,float16,0,3.519317309061686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,2,128,0,1,float16,fp8,0,3.485525449117025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,2,128,0,1,fp8,fp8,0,2.4524799982706704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,4,128,0,1,float16,float16,0,3.6474879582722983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,4,128,0,1,float16,fp8,0,3.6044801076253257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,4,128,0,1,fp8,fp8,0,2.623487949371338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,8,128,0,1,float16,float16,0,3.9650986989339194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,8,128,0,1,float16,fp8,0,3.9046827952067056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,64,8,128,0,1,fp8,fp8,0,2.902186711629232
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,64,128,0,1,float16,float16,0,4.35148811340332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,1,128,0,1,float16,float16,0,1.7126399676005046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,64,128,0,1,float16,fp8,0,4.083882649739583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,64,128,0,1,fp8,fp8,0,3.4469547271728516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,1,128,0,1,float16,fp8,0,1.6977920532226562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,1,128,0,1,fp8,fp8,0,1.141760031382243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,2,128,0,1,float16,fp8,0,1.7150293986002605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,2,128,0,1,float16,float16,0,1.7338026364644368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,2,128,0,1,fp8,fp8,0,1.1642879645029705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,4,128,0,1,float16,float16,0,1.8124799728393555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,4,128,0,1,float16,fp8,0,1.7931946118672688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,4,128,0,1,fp8,fp8,0,1.2330666383107503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,8,128,0,1,float16,float16,0,1.9836586316426594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,8,128,0,1,float16,fp8,0,1.9469653765360515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,64,8,128,0,1,fp8,fp8,0,1.4004906018575032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,64,128,0,1,float16,float16,0,2.1544960339864097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,64,128,0,1,float16,fp8,0,2.041173299153646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,1,128,0,1,float16,float16,0,0.8674986362457275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,64,128,0,1,fp8,fp8,0,1.6197973887125652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,1,128,0,1,float16,fp8,0,0.8596479892730713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,1,128,0,1,fp8,fp8,0,0.570026675860087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,2,128,0,1,float16,float16,0,0.8801279862721761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,2,128,0,1,float16,fp8,0,0.8777386347452799
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,2,128,0,1,fp8,fp8,0,0.5879466533660889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,4,128,0,1,float16,float16,0,0.928938627243042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,4,128,0,1,fp8,fp8,0,0.6181546847025553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,4,128,0,1,float16,fp8,0,0.8577706813812256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,8,128,0,1,float16,float16,0,0.9586346944173177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,8,128,0,1,float16,fp8,0,0.9483946959177653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,64,8,128,0,1,fp8,fp8,0,0.6615039904912313
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,64,128,0,1,float16,float16,0,1.0664959748586018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,64,128,0,1,fp8,fp8,0,0.7586133480072021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,1,128,0,1,float16,float16,0,0.31061333417892456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,1,128,0,1,float16,fp8,0,0.31112533807754517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,1,128,0,1,fp8,fp8,0,0.2230613430341085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,64,128,0,1,float16,fp8,0,0.986624002456665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,2,128,0,1,float16,float16,0,0.32290132840474445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,2,128,0,1,fp8,fp8,0,0.2290346622467041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,2,128,0,1,float16,fp8,0,0.3198293248812358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,4,128,0,1,float16,fp8,0,0.340992013613383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,4,128,0,1,float16,float16,0,0.3490133285522461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,4,128,0,1,fp8,fp8,0,0.24473599592844644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,8,128,0,1,float16,float16,0,0.39953064918518066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,8,128,0,1,float16,fp8,0,0.38860801855723065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,64,8,128,0,1,fp8,fp8,0,0.2797226707140605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,64,128,0,1,float16,float16,0,0.4490240017573039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,64,128,0,1,float16,fp8,0,0.40345601240793866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,64,128,0,1,fp8,fp8,0,0.3068586587905884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,1,128,0,1,float16,fp8,0,0.09403733412424724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,1,128,0,1,fp8,fp8,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,1,128,0,1,float16,float16,0,0.09676800171534221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,2,128,0,1,float16,float16,0,0.09710933764775594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,2,128,0,1,float16,fp8,0,0.0981333355108897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,2,128,0,1,fp8,fp8,0,0.06877866884072621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,4,128,0,1,float16,float16,0,0.09540266791979472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,4,128,0,1,float16,fp8,0,0.09471999605496724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,4,128,0,1,fp8,fp8,0,0.07048533360163371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,8,128,0,1,float16,float16,0,0.10052266716957092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,8,128,0,1,float16,fp8,0,0.09898666540781657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,64,8,128,0,1,fp8,fp8,0,0.07116800049940745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,64,128,0,1,float16,float16,0,0.08362666765848796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,64,128,0,1,float16,fp8,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,64,128,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,1,128,0,1,float16,fp8,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,1,128,0,1,float16,float16,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,1,128,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,2,128,0,1,float16,float16,0,0.051882664362589516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,2,128,0,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,2,128,0,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,4,128,0,1,float16,float16,0,0.050517335534095764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,4,128,0,1,float16,fp8,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,4,128,0,1,fp8,fp8,0,0.04027733455101649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,8,128,0,1,float16,float16,0,0.05222400029500326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,8,128,0,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,64,8,128,0,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,64,128,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,64,128,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,64,128,0,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,1,128,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,1,128,0,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,1,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,2,128,0,1,float16,float16,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,2,128,0,1,float16,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,4,128,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,2,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,4,128,0,1,float16,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,4,128,0,1,fp8,fp8,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,8,128,0,1,float16,float16,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,8,128,0,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,64,8,128,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,64,128,0,1,float16,float16,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,64,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,1,128,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,64,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,1,128,0,1,float16,fp8,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,1,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,2,128,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,2,128,0,1,float16,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,2,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,4,128,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,4,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,4,128,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,8,128,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,8,128,0,1,float16,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,64,8,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,64,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,64,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,64,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,1,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,1,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,1,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,2,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,2,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,2,128,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,4,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,4,128,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,4,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,8,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,8,128,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,64,8,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,1,128,0,1,float16,float16,0,1.702229340871175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,1,128,0,1,float16,fp8,0,1.6938667297363281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,1,128,0,1,fp8,fp8,0,1.1245226860046387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,2,128,0,1,float16,fp8,0,1.709397315979004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,2,128,0,1,float16,float16,0,1.7249280611673992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,2,128,0,1,fp8,fp8,0,1.1775999863942463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,4,128,0,1,float16,float16,0,1.8092373212178547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,4,128,0,1,float16,fp8,0,1.7949013710021973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,8,128,0,1,float16,float16,0,1.9761494000752766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,4,128,0,1,fp8,fp8,0,1.2788053353627522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,8,128,0,1,float16,fp8,0,1.9415040016174316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,64,8,128,0,1,fp8,fp8,0,1.435306708017985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,64,128,0,1,float16,float16,0,2.1833386421203613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,1,128,0,1,float16,float16,0,0.8116906483968099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,64,128,0,1,float16,fp8,0,2.057216008504232
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,64,128,0,1,fp8,fp8,0,1.624234676361084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,1,128,0,1,float16,fp8,0,0.8111786842346191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,1,128,0,1,fp8,fp8,0,0.5208746592203776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,2,128,0,1,float16,float16,0,0.8297812938690186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,2,128,0,1,float16,fp8,0,0.8250026702880859
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,2,128,0,1,fp8,fp8,0,0.5437440077463785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,4,128,0,1,float16,float16,0,0.8717652956644694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,4,128,0,1,fp8,fp8,0,0.5881173213322958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,4,128,0,1,float16,fp8,0,0.8656213283538818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,8,128,0,1,float16,float16,0,0.956928014755249
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,8,128,0,1,float16,fp8,0,0.9384960333506266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,64,8,128,0,1,fp8,fp8,0,0.672426700592041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,64,128,0,1,float16,float16,0,1.057792027791341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,64,128,0,1,float16,fp8,0,0.9994239807128906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,64,128,0,1,fp8,fp8,0,0.7726079622904459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,1,128,0,1,float16,float16,0,0.339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,1,128,0,1,float16,fp8,0,0.3333119948705037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,1,128,0,1,fp8,fp8,0,0.2300586700439453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,2,128,0,1,float16,float16,0,0.34867199261983234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,2,128,0,1,float16,fp8,0,0.34628268082936603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,2,128,0,1,fp8,fp8,0,0.23381332556406656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,4,128,0,1,float16,float16,0,0.3536213239034017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,4,128,0,1,float16,fp8,0,0.36932265758514404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,4,128,0,1,fp8,fp8,0,0.23534933725992838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,8,128,0,1,float16,float16,0,0.4010666608810425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,8,128,0,1,float16,fp8,0,0.39202133814493817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,64,8,128,0,1,fp8,fp8,0,0.26743467648824054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,64,128,0,1,float16,float16,0,0.4546560049057007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,64,128,0,1,float16,fp8,0,0.40482131640116376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,64,128,0,1,fp8,fp8,0,0.32255999247233075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,1,128,0,1,float16,float16,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,1,128,0,1,float16,fp8,0,0.08089600006739299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,1,128,0,1,fp8,fp8,0,0.06109866499900818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,2,128,0,1,float16,float16,0,0.08260266482830048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,2,128,0,1,float16,fp8,0,0.08157866696516673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,2,128,0,1,fp8,fp8,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,4,128,0,1,float16,float16,0,0.0820906658967336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,4,128,0,1,float16,fp8,0,0.08277333279450734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,4,128,0,1,fp8,fp8,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,8,128,0,1,float16,float16,0,0.09079466263453166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,8,128,0,1,fp8,fp8,0,0.06365866462389629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,64,128,0,1,float16,float16,0,0.07560533285140991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,64,8,128,0,1,float16,fp8,0,0.08738133311271667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,64,128,0,1,float16,fp8,0,0.05870933334032694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,64,128,0,1,fp8,fp8,0,0.041984001795450844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,1,128,0,1,float16,float16,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,1,128,0,1,float16,fp8,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,1,128,0,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,2,128,0,1,float16,float16,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,2,128,0,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,2,128,0,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,4,128,0,1,float16,float16,0,0.04420266548792521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,4,128,0,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,4,128,0,1,fp8,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,8,128,0,1,float16,float16,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,8,128,0,1,float16,fp8,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,64,8,128,0,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,64,128,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,64,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,64,128,0,1,float16,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,1,128,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,1,128,0,1,float16,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,1,128,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,2,128,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,2,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,2,128,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,4,128,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,4,128,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,4,128,0,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,8,128,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,8,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,64,8,128,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,64,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,64,128,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,64,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,1,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,1,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,1,128,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,2,128,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,2,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,2,128,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,4,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,4,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,8,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,4,128,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,8,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,64,8,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,64,128,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,64,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,64,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,1,128,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,1,128,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,1,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,2,128,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,2,128,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,2,128,0,1,fp8,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,4,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,4,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,4,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,8,128,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,8,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,64,8,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,64,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,64,128,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,64,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,1,128,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,1,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,1,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,2,128,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,2,128,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,2,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,4,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,4,128,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,4,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,8,128,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,8,128,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,64,8,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,1,128,0,1,float16,fp8,0,0.7982079982757568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,1,128,0,1,float16,float16,0,0.8009386857350668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,1,128,0,1,fp8,fp8,0,0.5399893522262573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,2,128,0,1,float16,float16,0,0.823637326558431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,2,128,0,1,float16,fp8,0,0.8157866795857748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,2,128,0,1,fp8,fp8,0,0.551423986752828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,4,128,0,1,float16,float16,0,0.8702293237050375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,4,128,0,1,float16,fp8,0,0.8605013688405355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,4,128,0,1,fp8,fp8,0,0.5833386580149332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,8,128,0,1,float16,float16,0,0.9598293304443359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,8,128,0,1,float16,fp8,0,0.9405439694722494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,64,8,128,0,1,fp8,fp8,0,0.6481920083363851
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,64,128,0,1,float16,float16,0,1.072981357574463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,64,128,0,1,fp8,fp8,0,0.7572480042775472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,64,128,0,1,float16,fp8,0,0.9922560056050619
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,1,128,0,1,float16,float16,0,0.3141973416010539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,1,128,0,1,float16,fp8,0,0.3413333495457967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,1,128,0,1,fp8,fp8,0,0.22408533096313477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,2,128,0,1,float16,float16,0,0.32477867603302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,2,128,0,1,fp8,fp8,0,0.23057067394256592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,2,128,0,1,float16,fp8,0,0.32392533620198566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,4,128,0,1,float16,float16,0,0.3490133285522461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,4,128,0,1,float16,fp8,0,0.34508800506591797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,8,128,0,1,float16,float16,0,0.40328534444173175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,4,128,0,1,fp8,fp8,0,0.24627200762430826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,8,128,0,1,float16,fp8,0,0.39031465848286945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,64,8,128,0,1,fp8,fp8,0,0.2855253418286641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,64,128,0,1,float16,float16,0,0.46506667137145996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,64,128,0,1,float16,fp8,0,0.43161598841349286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,64,128,0,1,fp8,fp8,0,0.31726932525634766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,1,128,0,1,float16,float16,0,0.08635733524958293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,1,128,0,1,float16,fp8,0,0.0846506655216217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,2,128,0,1,float16,float16,0,0.08755200107892354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,1,128,0,1,fp8,fp8,0,0.06809600194295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,2,128,0,1,float16,fp8,0,0.08686932921409607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,2,128,0,1,fp8,fp8,0,0.06877866884072621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,4,128,0,1,float16,float16,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,4,128,0,1,float16,fp8,0,0.0865280032157898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,4,128,0,1,fp8,fp8,0,0.06946133573849995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,8,128,0,1,float16,float16,0,0.09489066402117412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,8,128,0,1,float16,fp8,0,0.09079466263453166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,64,8,128,0,1,fp8,fp8,0,0.06963199873765309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,64,128,0,1,float16,float16,0,0.07850666840871175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,64,128,0,1,float16,fp8,0,0.06092800199985504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,1,128,0,1,float16,float16,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,64,128,0,1,fp8,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,1,128,0,1,float16,fp8,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,1,128,0,1,fp8,fp8,0,0.038912000755469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,2,128,0,1,float16,float16,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,2,128,0,1,float16,fp8,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,2,128,0,1,fp8,fp8,0,0.03874133278926214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,4,128,0,1,float16,float16,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,4,128,0,1,float16,fp8,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,4,128,0,1,fp8,fp8,0,0.03908266623814901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,8,128,0,1,float16,float16,0,0.04761599997679392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,8,128,0,1,float16,fp8,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,64,8,128,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,64,128,0,1,float16,float16,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,64,128,0,1,float16,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,64,128,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,1,128,0,1,float16,float16,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,1,128,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,1,128,0,1,fp8,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,2,128,0,1,float16,float16,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,2,128,0,1,float16,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,4,128,0,1,float16,float16,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,2,128,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,4,128,0,1,float16,fp8,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,4,128,0,1,fp8,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,8,128,0,1,float16,float16,0,0.028330666323502857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,8,128,0,1,float16,fp8,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,64,8,128,0,1,fp8,fp8,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,64,128,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,64,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,1,128,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,1,128,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,64,128,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,1,128,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,2,128,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,2,128,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,2,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,4,128,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,4,128,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,4,128,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,8,128,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,8,128,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,64,8,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,64,128,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,64,128,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,64,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,1,128,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,1,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,2,128,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,2,128,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,1,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,2,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,4,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,4,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,8,128,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,4,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,8,128,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,64,8,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,64,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,64,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,64,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,1,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,1,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,1,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,2,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,2,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,2,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,4,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,4,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,4,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,8,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,8,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,64,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,64,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,64,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,64,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,1,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,1,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,2,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,2,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,4,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,4,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,4,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,8,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,8,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,64,8,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,1,128,0,1,float16,float16,0,0.32153600454330444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,1,128,0,1,float16,fp8,0,0.31948800881703693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,1,128,0,1,fp8,fp8,0,0.27357866366704303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,2,128,0,1,float16,float16,0,0.3298986752827962
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,2,128,0,1,fp8,fp8,0,0.28945066531499225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,2,128,0,1,float16,fp8,0,0.3293866713841756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,4,128,0,1,float16,float16,0,0.35072000821431476
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,4,128,0,1,float16,fp8,0,0.34508800506591797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,4,128,0,1,fp8,fp8,0,0.2974720001220703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,8,128,0,1,float16,float16,0,0.39714133739471436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,8,128,0,1,float16,fp8,0,0.3831466833750407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,64,8,128,0,1,fp8,fp8,0,0.33501867453257245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,64,128,0,1,float16,float16,0,0.45482667287190753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,64,128,0,1,float16,fp8,0,0.41045331954956055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,64,128,0,1,fp8,fp8,0,0.32921600341796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,1,128,0,1,float16,float16,0,0.11758933464686076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,1,128,0,1,float16,fp8,0,0.11793067057927449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,1,128,0,1,fp8,fp8,0,0.09284266829490662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,2,128,0,1,float16,float16,0,0.11963733037312825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,2,128,0,1,float16,fp8,0,0.1181013286113739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,2,128,0,1,fp8,fp8,0,0.09335466225941975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,4,128,0,1,float16,float16,0,0.12066133817036946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,4,128,0,1,float16,fp8,0,0.11793067057927449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,4,128,0,1,fp8,fp8,0,0.0942080020904541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,8,128,0,1,float16,fp8,0,0.12151466806729634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,8,128,0,1,float16,float16,0,0.12731732924779257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,64,128,0,1,float16,float16,0,0.0942080020904541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,64,8,128,0,1,fp8,fp8,0,0.09540266791979472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,64,128,0,1,float16,fp8,0,0.07321600119272868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,64,128,0,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,1,128,0,1,float16,float16,0,0.06348800162474315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,1,128,0,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,1,128,0,1,fp8,fp8,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,2,128,0,1,float16,float16,0,0.06400000055631001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,2,128,0,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,2,128,0,1,fp8,fp8,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,4,128,0,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,4,128,0,1,float16,fp8,0,0.06519466638565063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,4,128,0,1,fp8,fp8,0,0.05120000243186951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,8,128,0,1,float16,float16,0,0.06400000055631001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,8,128,0,1,float16,fp8,0,0.06400000055631001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,64,8,128,0,1,fp8,fp8,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,64,128,0,1,float16,float16,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,64,128,0,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,64,128,0,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,1,128,0,1,float16,float16,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,1,128,0,1,float16,fp8,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,1,128,0,1,fp8,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,2,128,0,1,float16,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,2,128,0,1,float16,float16,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,2,128,0,1,fp8,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,4,128,0,1,float16,float16,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,4,128,0,1,float16,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,4,128,0,1,fp8,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,8,128,0,1,float16,float16,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,8,128,0,1,float16,fp8,0,0.04266666869322459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,64,8,128,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,64,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,64,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,64,128,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,1,128,0,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,1,128,0,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,1,128,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,2,128,0,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,2,128,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,2,128,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,4,128,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,4,128,0,1,float16,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,4,128,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,8,128,0,1,float16,float16,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,8,128,0,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,64,8,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,64,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,64,128,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,64,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,1,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,1,128,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,1,128,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,2,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,2,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,2,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,4,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,4,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,4,128,0,1,float16,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,8,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,8,128,0,1,float16,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,64,8,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,64,128,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,64,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,64,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,1,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,1,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,1,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,2,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,2,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,2,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,4,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,4,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,4,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,8,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,8,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,64,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,64,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,64,8,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,64,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,1,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,1,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,2,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,2,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,2,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,4,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,4,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,4,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,8,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,8,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,64,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,64,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,64,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,64,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,1,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,1,128,0,1,fp8,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,2,128,0,1,float16,float16,0,0.009717333440979322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,2,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,4,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,8,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,8,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,64,8,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,1,128,0,1,fp8,fp8,0,104.01638793945312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,2,128,0,1,fp8,fp8,0,102.09194946289062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,1,128,0,1,float16,fp8,0,170.7716267903646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,1,128,0,1,float16,float16,0,176.25394694010416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,float16,0,172.47762044270834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,fp8,0,174.1339314778646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,float16,0,173.68558756510416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,fp8,0,173.9723103841146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,4,128,0,1,fp8,fp8,0,102.77956136067708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,48,128,0,1,fp8,fp8,0,54.3284912109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,1,128,0,1,float16,float16,0,87.39993286132812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,float16,0,92.11153157552083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,fp8,0,93.20055135091145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,8,128,0,1,fp8,fp8,0,105.10301717122395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,float16,0,173.2696736653646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,1,128,0,1,fp8,fp8,0,49.68157958984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,1,128,0,1,float16,fp8,0,86.52663167317708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,2,128,0,1,fp8,fp8,0,50.02905782063802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,fp8,0,174.02555338541666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,4,128,0,1,fp8,fp8,0,50.534912109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,float16,0,87.43082682291667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,fp8,0,88.57275390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,fp8,0,88.57838948567708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,float16,0,90.5147705078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,float16,0,87.4052225748698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,48,128,0,1,fp8,fp8,0,26.865834554036457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,8,128,0,1,fp8,fp8,0,49.738067626953125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,float16,0,43.88232421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,fp8,0,43.4346669514974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,1,128,0,1,float16,float16,0,41.54521687825521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,fp8,0,90.17429606119792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,1,128,0,1,float16,fp8,0,42.59430440266927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,1,128,0,1,fp8,fp8,0,24.62481180826823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,2,128,0,1,fp8,fp8,0,24.413355509440105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,float16,0,41.933311462402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,fp8,0,41.527295430501304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,4,128,0,1,fp8,fp8,0,25.19927469889323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,float16,0,41.15234120686849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,fp8,0,41.829376220703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,fp8,0,41.4023691813151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,float16,0,42.83801778157552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,48,8,128,0,1,fp8,fp8,0,24.92518361409505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,fp8,0,21.19918950398763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,float16,0,21.621078491210938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,48,128,0,1,fp8,fp8,0,13.906602223714193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,1,128,0,1,float16,float16,0,20.427263895670574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,1,128,0,1,fp8,fp8,0,12.465493520100912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,2,128,0,1,fp8,fp8,0,12.515327453613281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,1,128,0,1,float16,fp8,0,20.846079508463543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,float16,0,21.169493357340496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,fp8,0,20.971691131591797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,float16,0,20.93670399983724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,fp8,0,21.370709737141926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,4,128,0,1,fp8,fp8,0,12.430848439534506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,8,128,0,1,fp8,fp8,0,12.369066874186197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,float16,0,20.34107716878255
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,fp8,0,20.50167465209961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,1,128,0,1,fp8,fp8,0,58.97335306803385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,2,128,0,1,fp8,fp8,0,58.934956868489586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,1,128,0,1,float16,float16,0,101.38726806640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,1,128,0,1,float16,fp8,0,99.70892333984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,fp8,0,99.14828491210938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,float16,0,102.6107686360677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,float16,0,98.72332763671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,fp8,0,99.92482503255208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,4,128,0,1,fp8,fp8,0,59.89802551269531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,48,128,0,1,fp8,fp8,0,31.62658182779948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,float16,0,50.64140828450521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,fp8,0,51.97550964355469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,8,128,0,1,fp8,fp8,0,58.945536295572914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,1,128,0,1,float16,float16,0,47.57964579264323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,float16,0,101.41628011067708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,1,128,0,1,float16,fp8,0,47.98668924967448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,1,128,0,1,fp8,fp8,0,27.962880452473957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,fp8,0,102.41604614257812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,2,128,0,1,fp8,fp8,0,28.41821797688802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,float16,0,47.50421142578125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,fp8,0,48.752298990885414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,4,128,0,1,fp8,fp8,0,29.02630360921224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,float16,0,47.63306681315104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,fp8,0,48.030721028645836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,8,128,0,1,fp8,fp8,0,29.089792887369793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,float16,0,47.56463114420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,float16,0,25.477460225423176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,48,128,0,1,fp8,fp8,0,15.704575856526693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,fp8,0,49.66638692220052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,fp8,0,25.85668182373047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,1,128,0,1,fp8,fp8,0,14.17728042602539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,1,128,0,1,float16,float16,0,23.83172353108724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,1,128,0,1,float16,fp8,0,23.869099934895832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,2,128,0,1,fp8,fp8,0,14.55291748046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,float16,0,23.622825622558594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,fp8,0,23.791956583658855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,4,128,0,1,fp8,fp8,0,14.397610982259115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,fp8,0,24.4322992960612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,float16,0,24.623275756835938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,float16,0,24.301055908203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,fp8,0,23.722325642903645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,float16,0,12.565675099690756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,48,8,128,0,1,fp8,fp8,0,14.652586619059244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,48,128,0,1,fp8,fp8,0,7.9965864817301435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,fp8,0,12.685141245524088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,1,128,0,1,float16,fp8,0,12.431531270345053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,1,128,0,1,float16,float16,0,12.007935841878256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,1,128,0,1,fp8,fp8,0,6.861141204833984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,2,128,0,1,fp8,fp8,0,6.767957051595052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,float16,0,11.94973882039388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,fp8,0,12.04104487101237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,4,128,0,1,fp8,fp8,0,7.542954762776692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,float16,0,12.13525390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,fp8,0,11.992064158121744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,float16,0,12.178602854410807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,fp8,0,12.06271998087565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,48,8,128,0,1,fp8,fp8,0,7.114751815795898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,1,128,0,1,fp8,fp8,0,41.76520538330078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,2,128,0,1,fp8,fp8,0,41.10540771484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,1,128,0,1,float16,float16,0,69.38948059082031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,1,128,0,1,float16,fp8,0,69.51151021321614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,float16,0,69.9666748046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,fp8,0,69.86547342936198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,float16,0,70.07931518554688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,fp8,0,70.82734680175781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,4,128,0,1,fp8,fp8,0,42.36322021484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,48,128,0,1,fp8,fp8,0,22.525611877441406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,8,128,0,1,fp8,fp8,0,40.548352559407554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,float16,0,36.64059702555338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,fp8,0,36.31172180175781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,1,128,0,1,float16,float16,0,33.888938903808594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,float16,0,72.50790405273438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,fp8,0,70.21943664550781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,1,128,0,1,float16,fp8,0,34.813611348470054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,1,128,0,1,fp8,fp8,0,19.978069305419922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,2,128,0,1,fp8,fp8,0,20.129962921142578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,float16,0,33.36823527018229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,fp8,0,33.71246846516927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,float16,0,34.18128967285156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,4,128,0,1,fp8,fp8,0,19.866282145182293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,fp8,0,34.5152842203776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,8,128,0,1,fp8,fp8,0,20.49570083618164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,float16,0,35.3262939453125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,fp8,0,33.88159942626953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,float16,0,17.726464589436848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,fp8,0,17.707008361816406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,48,128,0,1,fp8,fp8,0,11.529045104980469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,1,128,0,1,float16,float16,0,16.696832021077473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,1,128,0,1,float16,fp8,0,16.88831965128581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,1,128,0,1,fp8,fp8,0,10.202112197875977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,2,128,0,1,fp8,fp8,0,10.292394638061523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,float16,0,17.329322814941406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,fp8,0,16.92194112141927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,float16,0,16.852991739908855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,4,128,0,1,fp8,fp8,0,10.163199742635092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,fp8,0,17.153536478678387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,float16,0,17.307818094889324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,fp8,0,17.018880208333332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,float16,0,9.367210388183594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,48,8,128,0,1,fp8,fp8,0,10.122581481933594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,fp8,0,9.009493509928385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,48,128,0,1,fp8,fp8,0,5.601280212402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,1,128,0,1,float16,float16,0,8.578730901082357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,1,128,0,1,float16,fp8,0,8.6200319925944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,1,128,0,1,fp8,fp8,0,5.040639877319336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,2,128,0,1,fp8,fp8,0,4.513279914855957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,float16,0,8.468138376871744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,fp8,0,8.879786809285482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,float16,0,8.429909388224283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,4,128,0,1,fp8,fp8,0,4.910421371459961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,fp8,0,8.414037068684896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,float16,0,8.647680282592773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,fp8,0,8.190805435180664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,48,8,128,0,1,fp8,fp8,0,5.0594132741292315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,1,128,0,1,fp8,fp8,0,55.294291178385414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,2,128,0,1,fp8,fp8,0,56.06041463216146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,1,128,0,1,float16,float16,0,92.90547688802083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,1,128,0,1,float16,fp8,0,96.31676228841145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,float16,0,93.94637044270833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,fp8,0,93.8798116048177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,fp8,0,94.22421264648438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,float16,0,94.68809000651042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,4,128,0,1,fp8,fp8,0,55.94487508138021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,48,128,0,1,fp8,fp8,0,30.83349355061849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,float16,0,48.14813741048177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,fp8,0,48.49152119954427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,1,128,0,1,float16,float16,0,45.24151611328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,8,128,0,1,fp8,fp8,0,59.50890604654948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,float16,0,96.31538899739583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,1,128,0,1,float16,fp8,0,44.12945048014323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,1,128,0,1,fp8,fp8,0,26.4094721476237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,fp8,0,93.70248413085938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,2,128,0,1,fp8,fp8,0,27.26690165201823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,float16,0,44.103851318359375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,fp8,0,44.300628662109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,4,128,0,1,fp8,fp8,0,26.994517008463543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,float16,0,46.009175618489586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,fp8,0,44.99268086751302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,8,128,0,1,fp8,fp8,0,27.917653401692707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,float16,0,44.30625915527344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,48,128,0,1,fp8,fp8,0,15.337130228678385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,float16,0,23.403177897135418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,fp8,0,44.78515116373698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,fp8,0,23.960235595703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,1,128,0,1,float16,float16,0,22.779390970865887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,1,128,0,1,float16,fp8,0,22.06293233235677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,1,128,0,1,fp8,fp8,0,13.52243169148763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,2,128,0,1,fp8,fp8,0,13.42361577351888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,float16,0,21.727572123209637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,4,128,0,1,fp8,fp8,0,13.704021453857422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,fp8,0,21.926912943522137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,float16,0,21.863423665364582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,fp8,0,21.814783732096355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,float16,0,22.4182612101237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,8,128,0,1,fp8,fp8,0,13.392725626627604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,float16,0,11.714218139648438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,fp8,0,22.67528533935547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,48,128,0,1,fp8,fp8,0,7.646207809448242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,fp8,0,12.218709309895834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,1,128,0,1,float16,float16,0,11.166549682617188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,1,128,0,1,float16,fp8,0,11.116373697916666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,1,128,0,1,fp8,fp8,0,6.55837885538737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,2,128,0,1,fp8,fp8,0,6.661631902058919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,float16,0,11.139755249023438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,fp8,0,11.192148844401041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,float16,0,11.132245381673178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,4,128,0,1,fp8,fp8,0,6.51588249206543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,fp8,0,11.511637369791666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,float16,0,11.563519795735678
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,8,128,0,1,fp8,fp8,0,6.581077575683594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,float16,0,5.793621063232422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,fp8,0,11.293525695800781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,48,128,0,1,fp8,fp8,0,3.78709348042806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,fp8,0,5.804714838663737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,1,128,0,1,float16,float16,0,5.415765126546224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,1,128,0,1,fp8,fp8,0,2.986325263977051
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,1,128,0,1,float16,fp8,0,5.556053161621094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,2,128,0,1,fp8,fp8,0,3.0938453674316406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,float16,0,5.610495885213216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,fp8,0,5.572437286376953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,float16,0,5.387093226114909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,4,128,0,1,fp8,fp8,0,3.0011733373006186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,fp8,0,5.4440962473551435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,float16,0,5.570559819539388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,fp8,0,5.5918935139973955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,48,8,128,0,1,fp8,fp8,0,3.2153600056966147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,1,128,0,1,fp8,fp8,0,31.91876220703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,2,128,0,1,fp8,fp8,0,31.976959228515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,1,128,0,1,float16,fp8,0,51.913726806640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,1,128,0,1,float16,float16,0,53.18946329752604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,fp8,0,51.6340077718099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,float16,0,52.17485046386719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,float16,0,54.061055501302086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,fp8,0,52.8742421468099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,4,128,0,1,fp8,fp8,0,32.34355163574219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,48,128,0,1,fp8,fp8,0,19.27782440185547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,float16,0,28.871681213378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,fp8,0,28.651690165201824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,8,128,0,1,fp8,fp8,0,33.860608418782554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,float16,0,52.95138041178385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,1,128,0,1,float16,float16,0,25.902933756510418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,fp8,0,52.26734924316406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,1,128,0,1,float16,fp8,0,25.72919464111328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,1,128,0,1,fp8,fp8,0,15.977642059326172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,2,128,0,1,fp8,fp8,0,15.762090047200521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,float16,0,25.51500701904297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,fp8,0,25.279146830240887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,fp8,0,26.117632548014324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,float16,0,25.76623026529948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,4,128,0,1,fp8,fp8,0,15.80407460530599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,float16,0,25.91505177815755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,8,128,0,1,fp8,fp8,0,16.192340850830078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,fp8,0,26.968233744303387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,float16,0,14.402730305989584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,fp8,0,14.213461558024088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,48,128,0,1,fp8,fp8,0,9.343829472859701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,1,128,0,1,float16,fp8,0,12.872875213623047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,1,128,0,1,float16,float16,0,12.903424580891928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,1,128,0,1,fp8,fp8,0,7.6549123128255205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,2,128,0,1,fp8,fp8,0,7.673856099446614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,float16,0,12.829524993896484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,fp8,0,13.285717010498047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,float16,0,12.889087677001953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,4,128,0,1,fp8,fp8,0,7.555413564046224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,fp8,0,12.84659194946289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,float16,0,13.164202372233072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,fp8,0,13.090816497802734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,48,8,128,0,1,fp8,fp8,0,7.670613606770833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,float16,0,7.22705078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,fp8,0,7.1492265065511065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,48,128,0,1,fp8,fp8,0,4.627797444661458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,1,128,0,1,float16,float16,0,6.317056020100911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,1,128,0,1,float16,fp8,0,6.619647979736328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,1,128,0,1,fp8,fp8,0,3.5725653966267905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,float16,0,6.04689089457194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,2,128,0,1,fp8,fp8,0,3.6857172648111978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,fp8,0,5.844309488932292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,float16,0,6.2248961130778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,fp8,0,5.9917653401692705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,4,128,0,1,fp8,fp8,0,3.494741439819336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,float16,0,6.523050944010417
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,fp8,0,6.201002756754558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,48,8,128,0,1,fp8,fp8,0,3.756373405456543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,float16,0,3.5457706451416016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,fp8,0,3.451391855875651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,48,128,0,1,fp8,fp8,0,2.3191893895467124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,1,128,0,1,float16,float16,0,2.859349250793457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,1,128,0,1,float16,fp8,0,2.8253866831461587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,1,128,0,1,fp8,fp8,0,1.7943894068400066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,float16,0,2.913109461466471
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,2,128,0,1,fp8,fp8,0,1.8199893633524578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,fp8,0,2.918741226196289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,float16,0,2.895359992980957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,fp8,0,2.9682346979777017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,4,128,0,1,fp8,fp8,0,1.7669119834899902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,float16,0,2.9893973668416343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,8,128,0,1,fp8,fp8,0,1.809066613515218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,fp8,0,3.069610595703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,1,128,0,1,fp8,fp8,0,31.40130106608073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,2,128,0,1,fp8,fp8,0,31.85083770751953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,1,128,0,1,float16,float16,0,51.08992004394531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,fp8,0,50.13452657063802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,1,128,0,1,float16,fp8,0,50.15006001790365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,float16,0,52.1876475016276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,float16,0,50.41100565592448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,fp8,0,51.175252278645836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,4,128,0,1,fp8,fp8,0,32.3068593343099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,48,128,0,1,fp8,fp8,0,19.991551717122395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,float16,0,28.318890889485676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,fp8,0,29.67688496907552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,8,128,0,1,fp8,fp8,0,33.225728352864586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,float16,0,50.84979248046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,1,128,0,1,float16,float16,0,24.403114318847656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,1,128,0,1,float16,fp8,0,23.921152750651043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,fp8,0,52.1698964436849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,1,128,0,1,fp8,fp8,0,15.085226694742838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,2,128,0,1,fp8,fp8,0,15.216810862223307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,float16,0,24.182271321614582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,fp8,0,25.23187255859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,float16,0,24.295082092285156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,fp8,0,24.86510976155599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,4,128,0,1,fp8,fp8,0,15.125162760416666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,float16,0,25.22350819905599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,8,128,0,1,fp8,fp8,0,15.522815704345703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,48,128,0,1,fp8,fp8,0,9.8875732421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,fp8,0,24.748886108398438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,float16,0,13.820587158203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,fp8,0,14.765397389729818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,1,128,0,1,float16,fp8,0,12.143274943033854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,1,128,0,1,float16,float16,0,12.24499257405599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,1,128,0,1,fp8,fp8,0,7.5675309499104815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,2,128,0,1,fp8,fp8,0,7.715328216552734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,float16,0,12.170069376627604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,fp8,0,12.318378448486328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,float16,0,12.26205825805664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,4,128,0,1,fp8,fp8,0,7.615999857584636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,fp8,0,12.709376017252604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,float16,0,12.274859110514322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,fp8,0,12.393131256103516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,48,8,128,0,1,fp8,fp8,0,7.675903956095378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,float16,0,6.851413091023763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,48,128,0,1,fp8,fp8,0,4.909738540649414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,fp8,0,6.894250869750977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,1,128,0,1,float16,float16,0,5.910357157389323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,1,128,0,1,float16,fp8,0,5.923157374064128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,1,128,0,1,fp8,fp8,0,3.3802239100138345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,float16,0,5.928618748982747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,2,128,0,1,fp8,fp8,0,3.4157225290934243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,fp8,0,5.964117050170898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,4,128,0,1,fp8,fp8,0,3.4778451919555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,float16,0,5.593088150024414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,fp8,0,6.012245178222656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,float16,0,6.194346745808919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,fp8,0,6.145194371541341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,48,8,128,0,1,fp8,fp8,0,3.694250742594401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,float16,0,3.4464426040649414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,fp8,0,3.444565455118815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,48,128,0,1,fp8,fp8,0,2.379605293273926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,1,128,0,1,float16,float16,0,2.748586654663086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,1,128,0,1,fp8,fp8,0,1.675605297088623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,1,128,0,1,float16,fp8,0,2.6762240727742515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,float16,0,2.8043947219848633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,fp8,0,2.7709439595540366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,2,128,0,1,fp8,fp8,0,1.6517119407653809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,float16,0,2.7910827000935874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,fp8,0,2.76258118947347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,4,128,0,1,fp8,fp8,0,1.762986660003662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,float16,0,2.909525235493978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,fp8,0,2.838015874226888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,float16,0,1.7708373069763184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,48,8,128,0,1,fp8,fp8,0,1.8298880259195964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,fp8,0,1.691648006439209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,48,128,0,1,fp8,fp8,0,1.1593386332194011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,1,128,0,1,float16,float16,0,1.3431466420491536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,1,128,0,1,float16,fp8,0,1.3527040481567383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,1,128,0,1,fp8,fp8,0,0.8509439627329508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,float16,0,1.3525333404541016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,fp8,0,1.3834239641825359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,float16,0,1.3315412998199463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,2,128,0,1,fp8,fp8,0,0.8775680065155029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,4,128,0,1,fp8,fp8,0,0.8572586377461752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,fp8,0,1.3405866622924805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,float16,0,1.368234634399414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,fp8,0,1.3883733749389648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,48,8,128,0,1,fp8,fp8,0,0.8705706596374512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,1,128,0,1,fp8,fp8,0,19.283114115397137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,2,128,0,1,fp8,fp8,0,19.036671956380207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,1,128,0,1,float16,float16,0,28.921173095703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,1,128,0,1,float16,fp8,0,28.74163309733073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,float16,0,29.019137064615887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,fp8,0,28.57489013671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,fp8,0,29.96428680419922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,float16,0,30.74645233154297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,4,128,0,1,fp8,fp8,0,19.032576243082683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,48,128,0,1,fp8,fp8,0,12.984320322672525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,float16,0,17.272661844889324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,fp8,0,17.63737614949544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,1,128,0,1,float16,float16,0,14.648831685384115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,8,128,0,1,fp8,fp8,0,20.071765899658203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,float16,0,29.909505208333332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,fp8,0,30.297940572102863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,1,128,0,1,float16,fp8,0,14.506666819254557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,1,128,0,1,fp8,fp8,0,8.5840212504069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,2,128,0,1,fp8,fp8,0,8.769194920857748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,float16,0,14.179840087890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,fp8,0,14.495914459228516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,float16,0,14.352383931477865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,fp8,0,14.346410115559896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,4,128,0,1,fp8,fp8,0,9.003007888793945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,8,128,0,1,fp8,fp8,0,9.509717305501303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,fp8,0,14.579029083251953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,float16,0,8.698709487915039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,48,128,0,1,fp8,fp8,0,6.377301534016927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,fp8,0,8.84326426188151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,float16,0,14.530731201171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,1,128,0,1,float16,float16,0,7.217493057250977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,1,128,0,1,float16,fp8,0,7.277738571166992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,1,128,0,1,fp8,fp8,0,4.221269289652507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,2,128,0,1,fp8,fp8,0,4.245674769083659
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,4,128,0,1,fp8,fp8,0,4.318378766377767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,fp8,0,7.0306135813395185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,float16,0,7.12004280090332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,float16,0,7.165098826090495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,fp8,0,7.221077601114909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,float16,0,7.1446183522542315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,fp8,0,7.390207926432292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,48,8,128,0,1,fp8,fp8,0,4.4166825612386065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,48,128,0,1,fp8,fp8,0,3.1066452662150064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,1,128,0,1,float16,float16,0,3.302058537801107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,float16,0,4.366847991943359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,fp8,0,4.300117174784343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,1,128,0,1,fp8,fp8,0,2.070528030395508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,1,128,0,1,float16,fp8,0,3.2861865361531577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,2,128,0,1,fp8,fp8,0,2.0461227099100747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,float16,0,3.338752110799154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,fp8,0,3.2448854446411133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,4,128,0,1,fp8,fp8,0,2.1222400665283203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,float16,0,3.359744071960449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,fp8,0,3.3332907358805337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,float16,0,3.5075413386027017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,fp8,0,3.4556585947672525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,float16,0,2.1917014122009277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,48,128,0,1,fp8,fp8,0,1.520469347635905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,fp8,0,2.113877296447754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,48,8,128,0,1,fp8,fp8,0,2.1925546328226724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,1,128,0,1,float16,float16,0,1.5450453758239746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,1,128,0,1,float16,fp8,0,1.5885653495788574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,1,128,0,1,fp8,fp8,0,1.012394666671753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,float16,0,1.588223934173584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,2,128,0,1,fp8,fp8,0,0.9917439619700114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,fp8,0,1.5950506528218586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,4,128,0,1,fp8,fp8,0,1.0134186744689941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,float16,0,1.5832746823628743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,fp8,0,1.564842700958252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,float16,0,1.6812373797098796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,fp8,0,1.6826027234395344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,48,8,128,0,1,fp8,fp8,0,1.099946657816569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,float16,0,1.086634635925293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,48,128,0,1,fp8,fp8,0,0.741376002629598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,fp8,0,1.0559146404266357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,1,128,0,1,float16,float16,0,0.7966720263163248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,1,128,0,1,float16,fp8,0,0.7987199624379476
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,1,128,0,1,fp8,fp8,0,0.4939093192418416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,float16,0,0.8347307046254476
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,fp8,0,0.8290987014770508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,2,128,0,1,fp8,fp8,0,0.4877653519312541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,float16,0,0.8089599609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,4,128,0,1,fp8,fp8,0,0.4928853511810303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,fp8,0,0.8130559921264648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,float16,0,0.8052053451538086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,fp8,0,0.8209066390991211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,48,8,128,0,1,fp8,fp8,0,0.5181440114974976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,1,128,0,1,fp8,fp8,0,19.585194905598957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,2,128,0,1,fp8,fp8,0,20.232533772786457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,1,128,0,1,float16,float16,0,30.051157633463543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,1,128,0,1,float16,fp8,0,29.631487528483074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,fp8,0,30.530731201171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,float16,0,29.92639923095703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,float16,0,29.736788431803387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,fp8,0,29.884244283040363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,4,128,0,1,fp8,fp8,0,20.359167734781902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,1,128,0,1,float16,float16,0,14.105941772460938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,48,128,0,1,fp8,fp8,0,14.728020985921225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,8,128,0,1,fp8,fp8,0,21.289471944173176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,float16,0,18.952874501546223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,fp8,0,18.673664093017578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,float16,0,30.02624003092448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,fp8,0,30.106282552083332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,1,128,0,1,fp8,fp8,0,8.816640218098959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,1,128,0,1,float16,fp8,0,14.114303588867188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,2,128,0,1,fp8,fp8,0,8.91153081258138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,float16,0,13.930154164632162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,fp8,0,14.270122528076172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,float16,0,14.217216491699219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,4,128,0,1,fp8,fp8,0,9.397418975830078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,fp8,0,14.209706624348959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,float16,0,14.701738993326822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,fp8,0,14.614186604817709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,48,8,128,0,1,fp8,fp8,0,9.902933120727539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,float16,0,9.343488057454428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,1,128,0,1,float16,fp8,0,6.376106897989909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,1,128,0,1,float16,float16,0,7.097855885823567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,48,128,0,1,fp8,fp8,0,7.197866439819336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,fp8,0,9.35799471537272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,1,128,0,1,fp8,fp8,0,4.3538773854573565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,2,128,0,1,fp8,fp8,0,4.329984029134114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,float16,0,7.007232030232747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,fp8,0,6.89356803894043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,4,128,0,1,fp8,fp8,0,4.362922668457031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,float16,0,6.935381571451823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,fp8,0,7.114240010579427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,float16,0,6.920874913533528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,8,128,0,1,fp8,fp8,0,4.697429339090983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,float16,0,4.638208071390788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,fp8,0,6.987775802612305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,48,128,0,1,fp8,fp8,0,3.50600528717041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,fp8,0,4.6484479904174805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,1,128,0,1,float16,float16,0,3.218261400858561
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,1,128,0,1,fp8,fp8,0,2.065920035044352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,1,128,0,1,float16,fp8,0,3.285162607828776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,2,128,0,1,fp8,fp8,0,2.139135996500651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,float16,0,3.2812372843424478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,fp8,0,3.281749407450358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,4,128,0,1,fp8,fp8,0,2.159104029337565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,fp8,0,3.371349334716797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,float16,0,3.3201494216918945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,float16,0,3.4669227600097656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,fp8,0,3.505493481953939
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,48,8,128,0,1,fp8,fp8,0,2.3282346725463867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,float16,0,2.3128746350606284
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,48,128,0,1,fp8,fp8,0,1.7179306348164876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,1,128,0,1,fp8,fp8,0,1.0106879870096843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,fp8,0,2.2543360392252603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,1,128,0,1,float16,float16,0,1.5870292981465657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,1,128,0,1,float16,fp8,0,1.5566506385803223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,float16,0,1.6235520044962566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,2,128,0,1,fp8,fp8,0,1.042944033940633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,fp8,0,1.6201386451721191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,fp8,0,1.6197973887125652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,4,128,0,1,fp8,fp8,0,1.0647892951965332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,float16,0,1.6232105890909831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,float16,0,1.7191252708435059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,fp8,0,1.6783359845479329
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,48,8,128,0,1,fp8,fp8,0,1.1397120157877605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,float16,0,1.149781306584676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,48,128,0,1,fp8,fp8,0,0.8263680140177408
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,fp8,0,1.1224746704101562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,1,128,0,1,float16,float16,0,0.7403519948323568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,1,128,0,1,fp8,fp8,0,0.46967466672261554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,1,128,0,1,float16,fp8,0,0.7369386355082194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,float16,0,0.741376002629598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,fp8,0,0.7679999669392904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,float16,0,0.7432533105214437
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,4,128,0,1,fp8,fp8,0,0.4896426598230998
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,2,128,0,1,fp8,fp8,0,0.47701334953308105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,fp8,0,0.7606613636016846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,float16,0,0.7864320278167725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,8,128,0,1,fp8,fp8,0,0.5120000044504801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,fp8,0,0.7683413028717041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,float16,0,0.49902931849161786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,fp8,0,0.44236799081166583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,48,128,0,1,fp8,fp8,0,0.3742719888687134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,1,128,0,1,float16,float16,0,0.407039999961853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,1,128,0,1,float16,fp8,0,0.41437868277231854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,1,128,0,1,fp8,fp8,0,0.2558293342590332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,float16,0,0.4078933397928874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,fp8,0,0.4217173258463542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,2,128,0,1,fp8,fp8,0,0.2616320053736369
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,float16,0,0.411135991414388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,fp8,0,0.40960001945495605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,4,128,0,1,fp8,fp8,0,0.26077866554260254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,float16,0,0.42052265008290607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,fp8,0,0.41659732659657794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,48,8,128,0,1,fp8,fp8,0,0.2578773299853007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,1,128,0,1,float16,float16,0,17.29297129313151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,1,128,0,1,float16,fp8,0,17.986901601155598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,1,128,0,1,fp8,fp8,0,11.82958984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,2,128,0,1,fp8,fp8,0,12.355242411295572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,float16,0,17.43684260050456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,fp8,0,17.320618947347004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,fp8,0,17.434112548828125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,float16,0,18.389503479003906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,4,128,0,1,fp8,fp8,0,12.518229166666666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,1,128,0,1,float16,float16,0,8.749909083048502
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,8,128,0,1,fp8,fp8,0,13.677567799886068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,48,128,0,1,fp8,fp8,0,9.83569081624349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,float16,0,18.939050038655598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,fp8,0,17.960447947184246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,float16,0,12.285439809163412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,fp8,0,12.372138977050781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,1,128,0,1,float16,fp8,0,8.310613632202148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,1,128,0,1,fp8,fp8,0,5.401599884033203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,float16,0,8.506197611490885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,4,128,0,1,fp8,fp8,0,5.718186696370442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,2,128,0,1,fp8,fp8,0,5.56117312113444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,fp8,0,8.425301233927408
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,float16,0,8.36300786336263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,fp8,0,8.398165384928385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,float16,0,8.579584121704102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,8,128,0,1,fp8,fp8,0,6.234794616699219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,float16,0,6.074538548787435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,1,128,0,1,float16,float16,0,4.049920082092285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,1,128,0,1,float16,fp8,0,4.062037467956543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,fp8,0,8.916309356689453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,fp8,0,6.138197580973308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,48,128,0,1,fp8,fp8,0,4.846762657165527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,1,128,0,1,fp8,fp8,0,2.6036906242370605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,2,128,0,1,fp8,fp8,0,2.675029436747233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,4,128,0,1,fp8,fp8,0,2.820608139038086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,float16,0,4.190208117167155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,fp8,0,4.092927932739258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,float16,0,4.180821418762207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,fp8,0,4.200959841410319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,float16,0,4.310698509216309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,fp8,0,4.279637336730957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,48,8,128,0,1,fp8,fp8,0,2.9716478983561196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,48,128,0,1,fp8,fp8,0,2.3478612899780273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,1,128,0,1,float16,fp8,0,2.02513058980306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,fp8,0,3.032917340596517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,1,128,0,1,fp8,fp8,0,1.3095253308614094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,1,128,0,1,float16,float16,0,1.977344036102295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,float16,0,3.0740480422973633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,float16,0,1.997312068939209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,fp8,0,1.9949226379394531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,2,128,0,1,fp8,fp8,0,1.3240319887797039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,4,128,0,1,fp8,fp8,0,1.374037265777588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,float16,0,2.0887893040974936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,fp8,0,2.0865707397460938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,float16,0,2.140501340230306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,8,128,0,1,fp8,fp8,0,1.4373547236124675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,fp8,0,2.1562026341756186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,float16,0,1.497770627339681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,1,128,0,1,float16,float16,0,0.9245013395945231
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,fp8,0,1.461077372233073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,48,128,0,1,fp8,fp8,0,1.113258679707845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,1,128,0,1,fp8,fp8,0,0.606549342473348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,1,128,0,1,float16,fp8,0,0.9255253473917643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,float16,0,0.9441280364990234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,fp8,0,0.9521493117014567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,2,128,0,1,fp8,fp8,0,0.629589319229126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,float16,0,0.9683626492818197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,4,128,0,1,fp8,fp8,0,0.6464853286743164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,fp8,0,0.9644373257954916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,float16,0,1.0332159996032715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,fp8,0,1.0308266480763753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,48,8,128,0,1,fp8,fp8,0,0.7031466960906982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,float16,0,0.7304533322652181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,fp8,0,0.6867626508076986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,48,128,0,1,fp8,fp8,0,0.5483520030975342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,1,128,0,1,float16,float16,0,0.4498773415883382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,1,128,0,1,float16,fp8,0,0.460970679918925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,1,128,0,1,fp8,fp8,0,0.2752853234608968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,fp8,0,0.4604586760203044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,float16,0,0.4773546854654948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,2,128,0,1,fp8,fp8,0,0.29047467311223346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,float16,0,0.45943466822306317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,fp8,0,0.45960533618927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,4,128,0,1,fp8,fp8,0,0.2800640066464742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,float16,0,0.4666026830673218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,fp8,0,0.4599466721216838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,48,8,128,0,1,fp8,fp8,0,0.29337600866953534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,fp8,0,0.2788693308830261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,float16,0,0.279039998849233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,48,128,0,1,fp8,fp8,0,0.19165867567062378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,1,128,0,1,float16,float16,0,0.24251733223597208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,1,128,0,1,float16,fp8,0,0.23654399315516153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,1,128,0,1,fp8,fp8,0,0.16486400365829468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,float16,0,0.2539520064989726
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,fp8,0,0.2558293342590332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,2,128,0,1,fp8,fp8,0,0.16554666558901468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,float16,0,0.2476373314857483
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,fp8,0,0.2481493353843689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,4,128,0,1,fp8,fp8,0,0.16076800227165222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,float16,0,0.24678399165471396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,fp8,0,0.25497599442799884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,48,8,128,0,1,fp8,fp8,0,0.16145066420237222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,1,128,0,1,float16,float16,0,17.518591562906902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,1,128,0,1,fp8,fp8,0,12.15948740641276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,1,128,0,1,float16,fp8,0,17.70939763387044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,2,128,0,1,fp8,fp8,0,12.452693939208984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,float16,0,17.798826853434246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,fp8,0,18.47313054402669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,float16,0,18.08349863688151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,fp8,0,17.8153813680013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,4,128,0,1,fp8,fp8,0,12.826794942220053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,1,128,0,1,float16,float16,0,8.397653579711914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,8,128,0,1,fp8,fp8,0,13.921621958414713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,float16,0,13.720405578613281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,48,128,0,1,fp8,fp8,0,11.524096171061197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,fp8,0,13.08364741007487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,fp8,0,19.160746256510418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,float16,0,18.993663787841797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,1,128,0,1,float16,fp8,0,8.372565587361654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,1,128,0,1,fp8,fp8,0,5.922645568847656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,float16,0,8.656384150187174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,2,128,0,1,fp8,fp8,0,6.056277592976888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,fp8,0,8.497151692708334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,float16,0,8.885248184204102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,4,128,0,1,fp8,fp8,0,6.420991897583008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,fp8,0,8.802474975585938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,float16,0,9.117183685302734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,fp8,0,9.166677474975586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,float16,0,6.856362660725911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,48,8,128,0,1,fp8,fp8,0,6.853631973266602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,fp8,0,6.4935251871744795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,1,128,0,1,float16,fp8,0,4.307626724243164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,48,128,0,1,fp8,fp8,0,5.703168233235677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,1,128,0,1,float16,float16,0,4.228437423706055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,1,128,0,1,fp8,fp8,0,2.8979199727376304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,float16,0,4.279808044433594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,2,128,0,1,fp8,fp8,0,2.960725466410319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,fp8,0,4.298922538757324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,4,128,0,1,fp8,fp8,0,3.065002759297689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,float16,0,4.412757237752278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,fp8,0,4.360191980997722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,float16,0,4.586496035257976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,fp8,0,4.43613878885905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,48,8,128,0,1,fp8,fp8,0,3.267754554748535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,float16,0,3.389439900716146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,fp8,0,3.224575996398926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,1,128,0,1,float16,float16,0,2.0640427271525064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,48,128,0,1,fp8,fp8,0,2.8195838928222656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,1,128,0,1,float16,fp8,0,2.0701866149902344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,1,128,0,1,fp8,fp8,0,1.4129494031270344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,float16,0,2.066773255666097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,fp8,0,2.080085277557373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,2,128,0,1,fp8,fp8,0,1.4392320315043132
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,float16,0,2.152789274851481
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,fp8,0,2.1295785903930664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,4,128,0,1,fp8,fp8,0,1.5259307225545247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,float16,0,2.2785706520080566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,fp8,0,2.2621866861979165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,48,8,128,0,1,fp8,fp8,0,1.5977813402811687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,float16,0,1.6745813687642415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,1,128,0,1,float16,float16,0,0.9871359666188558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,48,128,0,1,fp8,fp8,0,1.3397332827250164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,1,128,0,1,float16,fp8,0,0.9907200336456299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,fp8,0,1.6266239484151204
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,1,128,0,1,fp8,fp8,0,0.6882987022399902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,float16,0,1.0233173370361328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,fp8,0,0.9975466728210449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,2,128,0,1,fp8,fp8,0,0.6976853211720785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,float16,0,1.0548906326293945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,fp8,0,1.0419200261433919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,4,128,0,1,fp8,fp8,0,0.7352320353190104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,float16,0,1.1089920202891033
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,fp8,0,1.1098453203837078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,float16,0,0.8227840264638265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,48,8,128,0,1,fp8,fp8,0,0.7917226950327555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,fp8,0,0.7743146419525146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,48,128,0,1,fp8,fp8,0,0.6765226523081461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,1,128,0,1,float16,float16,0,0.43246932824452716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,1,128,0,1,float16,fp8,0,0.43673598766326904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,1,128,0,1,fp8,fp8,0,0.2810879945755005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,float16,0,0.4479999939600627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,fp8,0,0.4391253391901652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,2,128,0,1,fp8,fp8,0,0.2954240043958028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,float16,0,0.44680531819661456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,fp8,0,0.44390400250752765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,4,128,0,1,fp8,fp8,0,0.3176106611887614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,float16,0,0.47598934173583984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,fp8,0,0.463701327641805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,48,8,128,0,1,fp8,fp8,0,0.35891199111938477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,float16,0,0.34508800506591797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,fp8,0,0.30139732360839844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,48,128,0,1,fp8,fp8,0,0.28296534220377606
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,1,128,0,1,float16,float16,0,0.21708800395329794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,1,128,0,1,float16,fp8,0,0.2208426594734192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,float16,0,0.22869332631429037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,1,128,0,1,fp8,fp8,0,0.1358506679534912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,fp8,0,0.2300586700439453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,2,128,0,1,fp8,fp8,0,0.1353386640548706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,float16,0,0.22698666652043661
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,fp8,0,0.2193066676457723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,4,128,0,1,fp8,fp8,0,0.1358506679534912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,float16,0,0.22613332668940225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,fp8,0,0.2213546633720398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,float16,0,0.13294933239618936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,48,8,128,0,1,fp8,fp8,0,0.13704533378283182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,fp8,0,0.1293653349081675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,1,128,0,1,float16,float16,0,0.12151466806729634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,1,128,0,1,float16,fp8,0,0.12356266379356384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,48,128,0,1,fp8,fp8,0,0.08840533097585042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,1,128,0,1,fp8,fp8,0,0.08772266904513042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,float16,0,0.13209600249926248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,fp8,0,0.1288533310095469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,2,128,0,1,fp8,fp8,0,0.08413867155710857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,float16,0,0.12270933389663696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,fp8,0,0.12151466806729634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,4,128,0,1,fp8,fp8,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,8,128,0,1,fp8,fp8,0,0.08089600006739299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,fp8,0,0.12219732999801636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,float16,0,0.12356266379356384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,1,128,0,1,fp8,fp8,0,9.411242802937826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,1,128,0,1,float16,float16,0,12.812629699707031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,1,128,0,1,float16,fp8,0,12.836692810058594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,2,128,0,1,fp8,fp8,0,9.794560114542643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,2,128,0,1,float16,float16,0,12.890965779622396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,2,128,0,1,float16,fp8,0,12.99950917561849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,4,128,0,1,float16,float16,0,13.27786636352539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,4,128,0,1,float16,fp8,0,13.336405436197916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,4,128,0,1,fp8,fp8,0,10.198869069417318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,8,128,0,1,float16,fp8,0,14.055423736572266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,8,128,0,1,float16,float16,0,14.285311381022135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,48,8,128,0,1,fp8,fp8,0,11.357354482014975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,48,128,0,1,float16,fp8,0,11.783509572347006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,48,128,0,1,float16,float16,0,12.552703857421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,1,128,0,1,float16,float16,0,6.191786448160808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,48,128,0,1,fp8,fp8,0,10.629802703857422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,1,128,0,1,float16,fp8,0,6.063103993733724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,1,128,0,1,fp8,fp8,0,4.634794553120931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,2,128,0,1,float16,float16,0,6.3288319905598955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,2,128,0,1,fp8,fp8,0,4.658517201741536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,2,128,0,1,float16,fp8,0,6.3776429494222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,4,128,0,1,float16,float16,0,6.430890401204427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,4,128,0,1,fp8,fp8,0,4.933120091756185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,4,128,0,1,float16,fp8,0,6.447445551554362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,8,128,0,1,float16,float16,0,6.835541407267253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,8,128,0,1,float16,fp8,0,6.854997634887695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,48,8,128,0,1,fp8,fp8,0,5.474986394246419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,1,128,0,1,float16,float16,0,3.0658559799194336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,48,128,0,1,float16,float16,0,6.268757502237956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,48,128,0,1,fp8,fp8,0,5.1988480885823565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,48,128,0,1,float16,fp8,0,5.879637400309245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,1,128,0,1,float16,fp8,0,3.077631950378418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,1,128,0,1,fp8,fp8,0,2.2213973999023438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,2,128,0,1,float16,float16,0,3.1667200724283853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,2,128,0,1,float16,fp8,0,3.1441920598347983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,2,128,0,1,fp8,fp8,0,2.240682601928711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,4,128,0,1,float16,float16,0,3.213653246561686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,4,128,0,1,fp8,fp8,0,2.3569067319234214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,4,128,0,1,float16,fp8,0,3.186175982157389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,8,128,0,1,float16,fp8,0,3.2680959701538086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,8,128,0,1,float16,float16,0,3.4175999959309897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,48,8,128,0,1,fp8,fp8,0,2.6564265886942544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,48,128,0,1,float16,float16,0,3.120981216430664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,48,128,0,1,float16,fp8,0,2.9644800821940103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,1,128,0,1,float16,fp8,0,1.4755840301513672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,48,128,0,1,fp8,fp8,0,2.5270613034566245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,1,128,0,1,float16,float16,0,1.508522669474284
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,1,128,0,1,fp8,fp8,0,1.037823994954427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,2,128,0,1,float16,float16,0,1.5262719790140789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,2,128,0,1,float16,fp8,0,1.5320746103922527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,2,128,0,1,fp8,fp8,0,1.0792960325876872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,4,128,0,1,float16,float16,0,1.566208044687907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,4,128,0,1,float16,fp8,0,1.5397547086079915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,4,128,0,1,fp8,fp8,0,1.1298133532206218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,8,128,0,1,float16,float16,0,1.6672426859537761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,8,128,0,1,float16,fp8,0,1.6262826919555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,48,8,128,0,1,fp8,fp8,0,1.2475732962290447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,48,128,0,1,float16,float16,0,1.5465812683105469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,48,128,0,1,float16,fp8,0,1.4728533426920574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,1,128,0,1,float16,float16,0,0.6956373055775961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,1,128,0,1,float16,fp8,0,0.6917119820912679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,48,128,0,1,fp8,fp8,0,1.2296533584594727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,1,128,0,1,fp8,fp8,0,0.502613345781962
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,2,128,0,1,float16,float16,0,0.7152640024820963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,2,128,0,1,float16,fp8,0,0.7294293244679769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,2,128,0,1,fp8,fp8,0,0.5686613321304321
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,4,128,0,1,float16,float16,0,0.7534933090209961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,4,128,0,1,fp8,fp8,0,0.5553493499755859
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,4,128,0,1,float16,fp8,0,0.7417173385620117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,8,128,0,1,float16,float16,0,0.8108373483022054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,8,128,0,1,float16,fp8,0,0.7898453076680502
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,48,8,128,0,1,fp8,fp8,0,0.6343679825464884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,48,128,0,1,float16,float16,0,0.733354647954305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,48,128,0,1,float16,fp8,0,0.6867626508076986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,48,128,0,1,fp8,fp8,0,0.5708800156911215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,1,128,0,1,float16,float16,0,0.2805759906768799
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,1,128,0,1,float16,fp8,0,0.27938133478164673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,1,128,0,1,fp8,fp8,0,0.1919999917348226
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,2,128,0,1,float16,float16,0,0.2879146734873454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,2,128,0,1,float16,fp8,0,0.291157325108846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,2,128,0,1,fp8,fp8,0,0.20821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,4,128,0,1,float16,float16,0,0.3022506634394328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,4,128,0,1,float16,fp8,0,0.3002026677131653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,4,128,0,1,fp8,fp8,0,0.23705599705378214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,8,128,0,1,float16,float16,0,0.33536001046498615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,8,128,0,1,float16,fp8,0,0.3293866713841756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,48,8,128,0,1,fp8,fp8,0,0.2686293323834737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,48,128,0,1,float16,float16,0,0.2585600018501282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,48,128,0,1,float16,fp8,0,0.21282132466634116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,48,128,0,1,fp8,fp8,0,0.23756800095240274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,1,128,0,1,float16,float16,0,0.13994666934013367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,1,128,0,1,float16,fp8,0,0.13738666971524557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,1,128,0,1,fp8,fp8,0,0.09130666653315227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,2,128,0,1,float16,float16,0,0.1454080045223236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,2,128,0,1,float16,fp8,0,0.1397760013739268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,2,128,0,1,fp8,fp8,0,0.09215999643007915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,4,128,0,1,float16,fp8,0,0.14028799533843994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,4,128,0,1,float16,float16,0,0.14421332875887552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,4,128,0,1,fp8,fp8,0,0.09130666653315227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,8,128,0,1,float16,float16,0,0.145578662554423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,8,128,0,1,float16,fp8,0,0.14199466506640115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,48,8,128,0,1,fp8,fp8,0,0.09130666653315227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,48,128,0,1,float16,float16,0,0.08994133273760478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,48,128,0,1,float16,fp8,0,0.08942932883898418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,48,128,0,1,fp8,fp8,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,1,128,0,1,float16,fp8,0,0.07987200220425923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,1,128,0,1,float16,float16,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,1,128,0,1,fp8,fp8,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,2,128,0,1,float16,float16,0,0.08055466910203297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,2,128,0,1,float16,fp8,0,0.08038400113582611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,4,128,0,1,float16,float16,0,0.07867733140786488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,2,128,0,1,fp8,fp8,0,0.053077335158983864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,4,128,0,1,float16,fp8,0,0.0795306662718455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,4,128,0,1,fp8,fp8,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,8,128,0,1,float16,float16,0,0.0795306662718455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,8,128,0,1,float16,fp8,0,0.07850666840871175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,48,8,128,0,1,fp8,fp8,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,48,128,0,1,float16,float16,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,48,128,0,1,float16,fp8,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,48,128,0,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,1,128,0,1,float16,float16,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,1,128,0,1,float16,fp8,0,0.04863999783992767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,2,128,0,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,1,128,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,2,128,0,1,float16,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,4,128,0,1,float16,float16,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,2,128,0,1,fp8,fp8,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,4,128,0,1,float16,fp8,0,0.04727466901143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,4,128,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,8,128,0,1,float16,float16,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,8,128,0,1,float16,fp8,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,48,8,128,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,1,128,0,1,fp8,fp8,0,3.9569066365559897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,1,128,0,1,float16,fp8,0,5.381290435791016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,1,128,0,1,float16,float16,0,5.392896016438802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,2,128,0,1,float16,float16,0,5.50382932027181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,2,128,0,1,fp8,fp8,0,4.114261309305827
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,2,128,0,1,float16,fp8,0,5.47874132792155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,4,128,0,1,float16,float16,0,5.84550412495931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,4,128,0,1,float16,fp8,0,5.785770416259766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,4,128,0,1,fp8,fp8,0,4.388010660807292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,8,128,0,1,float16,float16,0,6.503082911173503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,8,128,0,1,float16,fp8,0,6.359551747639974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,48,8,128,0,1,fp8,fp8,0,4.972031911214192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,48,128,0,1,float16,float16,0,6.252543767293294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,48,128,0,1,float16,fp8,0,5.865983963012695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,1,128,0,1,float16,float16,0,2.6117119789123535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,48,128,0,1,fp8,fp8,0,5.174954732259114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,1,128,0,1,float16,fp8,0,2.605397383371989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,1,128,0,1,fp8,fp8,0,1.8742613792419434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,2,128,0,1,float16,float16,0,2.8392105102539062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,2,128,0,1,float16,fp8,0,2.8533760706583657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,2,128,0,1,fp8,fp8,0,1.9653973579406738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,4,128,0,1,float16,float16,0,2.872661272684733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,4,128,0,1,fp8,fp8,0,2.095786730448405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,4,128,0,1,float16,fp8,0,2.807978630065918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,8,128,0,1,float16,float16,0,3.1476052602132163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,8,128,0,1,float16,fp8,0,3.096405347188314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,48,8,128,0,1,fp8,fp8,0,2.388480027516683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,48,128,0,1,float16,float16,0,3.116373380025228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,48,128,0,1,float16,fp8,0,2.9550933837890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,1,128,0,1,float16,float16,0,1.3670399983723958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,48,128,0,1,fp8,fp8,0,2.5581226348876953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,1,128,0,1,float16,fp8,0,1.3608959515889485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,1,128,0,1,fp8,fp8,0,0.9408853054046631
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,2,128,0,1,float16,float16,0,1.3893973032633464
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,2,128,0,1,float16,fp8,0,1.293824036916097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,2,128,0,1,fp8,fp8,0,1.0026666323343914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,4,128,0,1,float16,fp8,0,1.3818880716959636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,4,128,0,1,float16,float16,0,1.4895787239074707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,4,128,0,1,fp8,fp8,0,1.002837340037028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,8,128,0,1,float16,float16,0,1.5559679667154949
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,8,128,0,1,float16,fp8,0,1.5235412915547688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,48,8,128,0,1,fp8,fp8,0,1.1289599736531575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,48,128,0,1,float16,fp8,0,1.4660266240437825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,48,128,0,1,float16,float16,0,1.5561386744181316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,1,128,0,1,float16,float16,0,0.5756586790084839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,1,128,0,1,float16,fp8,0,0.611840009689331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,48,128,0,1,fp8,fp8,0,1.1892053286234539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,1,128,0,1,fp8,fp8,0,0.4210346539815267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,2,128,0,1,float16,float16,0,0.5954560041427612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,2,128,0,1,float16,fp8,0,0.5872639815012614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,2,128,0,1,fp8,fp8,0,0.4411733150482178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,4,128,0,1,float16,float16,0,0.6927359898885092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,4,128,0,1,float16,fp8,0,0.6347093184789022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,4,128,0,1,fp8,fp8,0,0.4720640182495117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,8,128,0,1,float16,float16,0,0.7340373198191324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,8,128,0,1,fp8,fp8,0,0.5309439897537231
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,48,8,128,0,1,float16,fp8,0,0.716970682144165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,48,128,0,1,float16,float16,0,0.7412052949269613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,48,128,0,1,float16,fp8,0,0.6859093507130941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,48,128,0,1,fp8,fp8,0,0.562175989151001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,1,128,0,1,float16,float16,0,0.2121386726697286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,1,128,0,1,float16,fp8,0,0.20889600118001303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,1,128,0,1,fp8,fp8,0,0.14813866217931113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,2,128,0,1,float16,float16,0,0.2172586719195048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,2,128,0,1,fp8,fp8,0,0.1621333360671997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,2,128,0,1,float16,fp8,0,0.21862399578094482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,4,128,0,1,float16,float16,0,0.22937599817911783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,4,128,0,1,float16,fp8,0,0.2249386707941691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,4,128,0,1,fp8,fp8,0,0.1914880077044169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,8,128,0,1,float16,float16,0,0.2653866608937581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,8,128,0,1,float16,fp8,0,0.2513920068740845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,48,128,0,1,float16,float16,0,0.2387626568476359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,48,8,128,0,1,fp8,fp8,0,0.22681599855422974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,48,128,0,1,float16,fp8,0,0.1807360053062439
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,48,128,0,1,fp8,fp8,0,0.20872533321380615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,1,128,0,1,float16,float16,0,0.09437867005666097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,1,128,0,1,float16,fp8,0,0.09471999605496724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,2,128,0,1,float16,float16,0,0.09710933764775594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,1,128,0,1,fp8,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,2,128,0,1,float16,fp8,0,0.09676800171534221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,2,128,0,1,fp8,fp8,0,0.06877866884072621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,4,128,0,1,float16,float16,0,0.09608532985051473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,4,128,0,1,float16,fp8,0,0.09454933802286784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,4,128,0,1,fp8,fp8,0,0.06741333504517873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,8,128,0,1,float16,float16,0,0.09642666578292847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,8,128,0,1,float16,fp8,0,0.09437867005666097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,48,8,128,0,1,fp8,fp8,0,0.06809600194295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,48,128,0,1,float16,float16,0,0.06126933296521505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,48,128,0,1,float16,fp8,0,0.05922133227189382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,48,128,0,1,fp8,fp8,0,0.04437333345413208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,1,128,0,1,float16,float16,0,0.05376000205675761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,1,128,0,1,float16,fp8,0,0.053930665055910744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,1,128,0,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,2,128,0,1,float16,float16,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,2,128,0,1,float16,fp8,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,2,128,0,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,4,128,0,1,float16,float16,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,4,128,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,4,128,0,1,float16,fp8,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,8,128,0,1,float16,float16,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,8,128,0,1,float16,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,48,8,128,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,48,128,0,1,float16,float16,0,0.03822933385769526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,48,128,0,1,float16,fp8,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,48,128,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,1,128,0,1,float16,float16,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,1,128,0,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,1,128,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,2,128,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,2,128,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,2,128,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,4,128,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,4,128,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,4,128,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,8,128,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,8,128,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,48,8,128,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,48,128,0,1,float16,float16,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,48,128,0,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,48,128,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,1,128,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,1,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,1,128,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,2,128,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,2,128,0,1,float16,fp8,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,2,128,0,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,4,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,4,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,4,128,0,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,8,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,8,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,48,8,128,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,1,128,0,1,float16,float16,0,2.608469327290853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,1,128,0,1,float16,fp8,0,2.6033493677775064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,1,128,0,1,fp8,fp8,0,1.8391040166219075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,2,128,0,1,float16,float16,0,2.6668373743693032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,2,128,0,1,float16,fp8,0,2.642261346181234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,2,128,0,1,fp8,fp8,0,1.885525385538737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,4,128,0,1,float16,float16,0,2.8342612584431968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,4,128,0,1,float16,fp8,0,2.80729611714681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,4,128,0,1,fp8,fp8,0,2.041855971018473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,8,128,0,1,fp8,fp8,0,2.3746560414632163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,8,128,0,1,float16,float16,0,3.1433385213216147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,48,8,128,0,1,float16,fp8,0,3.082922617594401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,48,128,0,1,float16,float16,0,3.145557403564453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,48,128,0,1,float16,fp8,0,2.9818881352742515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,48,128,0,1,fp8,fp8,0,2.562901337941488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,1,128,0,1,float16,float16,0,1.2774399916330974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,1,128,0,1,float16,fp8,0,1.2707839806874592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,1,128,0,1,fp8,fp8,0,0.9060693581899008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,2,128,0,1,float16,float16,0,1.394858678181966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,2,128,0,1,float16,fp8,0,1.3914453188578289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,2,128,0,1,fp8,fp8,0,0.892245372136434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,4,128,0,1,float16,float16,0,1.3957120577494304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,4,128,0,1,float16,fp8,0,1.3858133951822917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,4,128,0,1,fp8,fp8,0,0.9615360101064047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,8,128,0,1,float16,float16,0,1.5527253150939941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,8,128,0,1,float16,fp8,0,1.5187625885009766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,48,8,128,0,1,fp8,fp8,0,1.1093333562215169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,48,128,0,1,float16,float16,0,1.5639893213907878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,48,128,0,1,float16,fp8,0,1.4854826927185059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,48,128,0,1,fp8,fp8,0,1.207466681798299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,1,128,0,1,float16,float16,0,0.6196906566619873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,1,128,0,1,float16,fp8,0,0.6217386722564697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,1,128,0,1,fp8,fp8,0,0.4251306851704915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,2,128,0,1,float16,float16,0,0.6038186550140381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,2,128,0,1,float16,fp8,0,0.5990399916966757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,2,128,0,1,fp8,fp8,0,0.44151465098063153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,4,128,0,1,float16,float16,0,0.6886400381724039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,4,128,0,1,fp8,fp8,0,0.44629331429799396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,4,128,0,1,float16,fp8,0,0.6838613351186117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,8,128,0,1,float16,float16,0,0.7379626433054606
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,8,128,0,1,fp8,fp8,0,0.5092693169911703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,48,8,128,0,1,float16,fp8,0,0.7154346307118734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,48,128,0,1,float16,float16,0,0.7403519948323568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,48,128,0,1,float16,fp8,0,0.6941013336181641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,48,128,0,1,fp8,fp8,0,0.5560319821039835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,1,128,0,1,float16,float16,0,0.16947199900945029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,1,128,0,1,float16,fp8,0,0.16793600718180338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,1,128,0,1,fp8,fp8,0,0.11366400122642517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,2,128,0,1,float16,float16,0,0.19336533546447754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,2,128,0,1,float16,fp8,0,0.17629865805308023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,2,128,0,1,fp8,fp8,0,0.13209600249926248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,4,128,0,1,float16,float16,0,0.20411733786265054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,4,128,0,1,float16,fp8,0,0.1960960030555725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,4,128,0,1,fp8,fp8,0,0.16708266735076904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,8,128,0,1,float16,float16,0,0.25548799832661945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,8,128,0,1,float16,fp8,0,0.24166399240493774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,48,8,128,0,1,fp8,fp8,0,0.20155733823776245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,48,128,0,1,float16,float16,0,0.24149332443873087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,48,128,0,1,float16,fp8,0,0.17749333381652832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,48,128,0,1,fp8,fp8,0,0.1962666710217794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,1,128,0,1,float16,float16,0,0.07714133461316426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,1,128,0,1,float16,fp8,0,0.07441066702206929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,1,128,0,1,fp8,fp8,0,0.053930665055910744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,2,128,0,1,float16,fp8,0,0.0730453332265218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,2,128,0,1,float16,float16,0,0.0766293356815974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,2,128,0,1,fp8,fp8,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,4,128,0,1,float16,float16,0,0.07355733215808868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,4,128,0,1,float16,fp8,0,0.07355733215808868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,4,128,0,1,fp8,fp8,0,0.054101333022117615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,8,128,0,1,float16,float16,0,0.0769706666469574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,8,128,0,1,float16,fp8,0,0.07509333391984303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,48,8,128,0,1,fp8,fp8,0,0.054101333022117615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,48,128,0,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,48,128,0,1,float16,float16,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,48,128,0,1,fp8,fp8,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,1,128,0,1,float16,float16,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,1,128,0,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,1,128,0,1,fp8,fp8,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,2,128,0,1,float16,float16,0,0.04232533276081085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,2,128,0,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,2,128,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,4,128,0,1,float16,float16,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,4,128,0,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,4,128,0,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,8,128,0,1,float16,float16,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,8,128,0,1,float16,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,48,8,128,0,1,fp8,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,48,128,0,1,float16,float16,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,1,128,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,48,128,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,48,128,0,1,fp8,fp8,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,1,128,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,1,128,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,2,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,2,128,0,1,float16,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,2,128,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,4,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,4,128,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,8,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,4,128,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,8,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,48,8,128,0,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,48,128,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,48,128,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,48,128,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,1,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,1,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,1,128,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,2,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,2,128,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,2,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,4,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,4,128,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,4,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,8,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,8,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,48,8,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,48,128,0,1,float16,float16,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,48,128,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,48,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,1,128,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,1,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,1,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,2,128,0,1,float16,float16,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,2,128,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,2,128,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,4,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,4,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,4,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,8,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,8,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,48,8,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,1,128,0,1,float16,float16,0,1.272320032119751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,1,128,0,1,fp8,fp8,0,0.8543573220570883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,1,128,0,1,float16,fp8,0,1.2721493244171143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,2,128,0,1,float16,float16,0,1.3120853106180828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,2,128,0,1,float16,fp8,0,1.3052586714426677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,2,128,0,1,fp8,fp8,0,0.8948053518931071
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,4,128,0,1,float16,float16,0,1.3957120577494304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,4,128,0,1,float16,fp8,0,1.3771093686421711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,4,128,0,1,fp8,fp8,0,0.9842346509297689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,8,128,0,1,float16,float16,0,1.5481173197428386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,8,128,0,1,float16,fp8,0,1.5250773429870605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,48,8,128,0,1,fp8,fp8,0,1.125205357869466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,48,128,0,1,float16,float16,0,1.55187193552653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,48,128,0,1,float16,fp8,0,1.4834346771240234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,48,128,0,1,fp8,fp8,0,1.2192426522572835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,1,128,0,1,float16,float16,0,0.5833386580149332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,1,128,0,1,float16,fp8,0,0.5818026860555013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,1,128,0,1,fp8,fp8,0,0.4145493507385254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,2,128,0,1,float16,float16,0,0.624127984046936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,2,128,0,1,float16,fp8,0,0.629589319229126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,2,128,0,1,fp8,fp8,0,0.3979946772257487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,4,128,0,1,float16,float16,0,0.6528000036875407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,4,128,0,1,float16,fp8,0,0.641706665356954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,4,128,0,1,fp8,fp8,0,0.43383467197418213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,8,128,0,1,float16,float16,0,0.7439359823862711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,8,128,0,1,float16,fp8,0,0.7224319775899252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,48,8,128,0,1,fp8,fp8,0,0.5104639927546183
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,48,128,0,1,float16,float16,0,0.756223996480306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,48,128,0,1,float16,fp8,0,0.6959786415100098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,48,128,0,1,fp8,fp8,0,0.5725866556167603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,1,128,0,1,float16,float16,0,0.18141865730285645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,1,128,0,1,float16,fp8,0,0.1795413295427958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,1,128,0,1,fp8,fp8,0,0.10359467069307964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,2,128,0,1,float16,float16,0,0.17885865767796835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,2,128,0,1,float16,fp8,0,0.17459199825922647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,2,128,0,1,fp8,fp8,0,0.11776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,4,128,0,1,float16,float16,0,0.21486934026082358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,4,128,0,1,float16,fp8,0,0.21230934063593546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,4,128,0,1,fp8,fp8,0,0.1565013329188029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,8,128,0,1,float16,float16,0,0.2578773299853007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,8,128,0,1,float16,fp8,0,0.24337067206700644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,48,8,128,0,1,fp8,fp8,0,0.18995199600855509
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,48,128,0,1,float16,float16,0,0.24320000410079956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,48,128,0,1,float16,fp8,0,0.19950934251149496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,48,128,0,1,fp8,fp8,0,0.19114667177200317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,1,128,0,1,float16,float16,0,0.061610668897628784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,1,128,0,1,float16,fp8,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,2,128,0,1,float16,float16,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,1,128,0,1,fp8,fp8,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,2,128,0,1,float16,fp8,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,2,128,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,4,128,0,1,float16,float16,0,0.06178133189678192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,4,128,0,1,float16,fp8,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,4,128,0,1,fp8,fp8,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,8,128,0,1,float16,float16,0,0.06451199948787689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,8,128,0,1,float16,fp8,0,0.06297599772612254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,48,128,0,1,float16,float16,0,0.04351999859015147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,48,8,128,0,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,48,128,0,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,48,128,0,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,1,128,0,1,float16,float16,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,1,128,0,1,float16,fp8,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,1,128,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,2,128,0,1,float16,float16,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,2,128,0,1,float16,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,2,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,4,128,0,1,float16,float16,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,4,128,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,4,128,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,8,128,0,1,float16,float16,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,8,128,0,1,float16,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,48,8,128,0,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,48,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,48,128,0,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,48,128,0,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,1,128,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,1,128,0,1,float16,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,1,128,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,2,128,0,1,float16,float16,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,2,128,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,2,128,0,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,4,128,0,1,float16,float16,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,4,128,0,1,float16,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,4,128,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,8,128,0,1,float16,float16,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,8,128,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,48,8,128,0,1,fp8,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,48,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,48,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,48,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,1,128,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,1,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,1,128,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,2,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,2,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,2,128,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,4,128,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,4,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,4,128,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,8,128,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,8,128,0,1,float16,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,48,8,128,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,48,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,48,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,48,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,1,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,1,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,1,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,2,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,2,128,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,2,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,4,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,4,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,8,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,4,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,8,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,48,8,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,48,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,48,128,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,48,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,1,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,1,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,1,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,2,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,2,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,4,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,2,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,4,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,8,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,8,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,4,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,48,8,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,1,128,0,1,float16,float16,0,0.5806080102920532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,1,128,0,1,float16,fp8,0,0.5806080102920532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,2,128,0,1,float16,float16,0,0.5990399916966757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,2,128,0,1,float16,fp8,0,0.5983573198318481
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,2,128,0,1,fp8,fp8,0,0.4145493507385254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,4,128,0,1,float16,float16,0,0.6539946794509888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,1,128,0,1,fp8,fp8,0,0.4026026725769043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,4,128,0,1,float16,fp8,0,0.641706665356954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,4,128,0,1,fp8,fp8,0,0.4466346502304077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,8,128,0,1,float16,float16,0,0.7389866511027018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,8,128,0,1,float16,fp8,0,0.7202133337656657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,48,8,128,0,1,fp8,fp8,0,0.5034666856129965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,48,128,0,1,float16,float16,0,0.7587839762369791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,48,128,0,1,float16,fp8,0,0.7002453009287516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,1,128,0,1,float16,float16,0,0.1641813317934672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,48,128,0,1,fp8,fp8,0,0.5582506656646729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,1,128,0,1,float16,fp8,0,0.16230400403340658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,1,128,0,1,fp8,fp8,0,0.11025066177050273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,2,128,0,1,float16,float16,0,0.17885865767796835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,2,128,0,1,float16,fp8,0,0.17544533809026083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,2,128,0,1,fp8,fp8,0,0.13755733768145242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,4,128,0,1,float16,float16,0,0.2058239976565043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,4,128,0,1,float16,fp8,0,0.1986560026804606
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,8,128,0,1,float16,float16,0,0.258730669816335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,4,128,0,1,fp8,fp8,0,0.16622933745384216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,8,128,0,1,float16,fp8,0,0.2450773318608602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,48,8,128,0,1,fp8,fp8,0,0.2230613430341085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,48,128,0,1,float16,float16,0,0.2501973311106364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,48,128,0,1,float16,fp8,0,0.1776640017827352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,48,128,0,1,fp8,fp8,0,0.19524266322453818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,1,128,0,1,float16,float16,0,0.06502399841944377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,1,128,0,1,float16,fp8,0,0.06417066852251689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,2,128,0,1,float16,float16,0,0.06690133114655812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,1,128,0,1,fp8,fp8,0,0.05256533126036326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,2,128,0,1,float16,fp8,0,0.06570666531721751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,2,128,0,1,fp8,fp8,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,4,128,0,1,float16,float16,0,0.06587733328342438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,4,128,0,1,float16,fp8,0,0.06570666531721751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,4,128,0,1,fp8,fp8,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,8,128,0,1,float16,float16,0,0.06604800124963124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,8,128,0,1,float16,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,48,128,0,1,float16,float16,0,0.043178667624791466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,48,8,128,0,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,48,128,0,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,48,128,0,1,fp8,fp8,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,1,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,1,128,0,1,float16,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,1,128,0,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,2,128,0,1,float16,float16,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,2,128,0,1,float16,fp8,0,0.03874133278926214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,4,128,0,1,float16,float16,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,4,128,0,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,2,128,0,1,fp8,fp8,0,0.0315733328461647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,4,128,0,1,fp8,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,8,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,8,128,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,48,8,128,0,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,48,128,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,48,128,0,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,48,128,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,1,128,0,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,1,128,0,1,fp8,fp8,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,1,128,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,2,128,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,2,128,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,2,128,0,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,4,128,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,4,128,0,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,4,128,0,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,8,128,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,8,128,0,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,48,8,128,0,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,48,128,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,48,128,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,48,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,1,128,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,1,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,1,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,2,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,2,128,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,2,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,4,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,4,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,4,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,8,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,8,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,48,8,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,48,128,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,48,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,48,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,1,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,1,128,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,1,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,2,128,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,2,128,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,2,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,4,128,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,4,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,4,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,8,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,8,128,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,48,8,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,48,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,48,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,48,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,1,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,1,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,1,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,2,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,2,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,2,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,4,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,4,128,0,1,float16,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,8,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,8,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,8,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,48,4,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,48,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,48,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,48,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,2,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,4,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,8,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,8,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,48,8,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,1,128,0,1,float16,float16,0,0.20087466637293497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,1,128,0,1,float16,fp8,0,0.20053333044052124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,1,128,0,1,fp8,fp8,0,0.1513813336690267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,2,128,0,1,float16,float16,0,0.2106026609738668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,2,128,0,1,float16,fp8,0,0.20821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,2,128,0,1,fp8,fp8,0,0.18193066120147705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,4,128,0,1,float16,float16,0,0.22801067431767783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,4,128,0,1,float16,fp8,0,0.22357332706451416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,4,128,0,1,fp8,fp8,0,0.20599466562271118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,8,128,0,1,float16,fp8,0,0.25173334280649823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,8,128,0,1,fp8,fp8,0,0.2384213407834371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,48,8,128,0,1,float16,float16,0,0.2667520046234131
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,48,128,0,1,float16,float16,0,0.24593067169189453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,48,128,0,1,float16,fp8,0,0.1802240014076233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,48,128,0,1,fp8,fp8,0,0.21316266059875488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,1,128,0,1,float16,float16,0,0.09045333663622539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,1,128,0,1,float16,fp8,0,0.09113599856694539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,1,128,0,1,fp8,fp8,0,0.07150933146476746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,2,128,0,1,float16,float16,0,0.09233066439628601
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,2,128,0,1,fp8,fp8,0,0.0721919983625412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,2,128,0,1,float16,fp8,0,0.09130666653315227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,4,128,0,1,float16,float16,0,0.09130666653315227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,4,128,0,1,float16,fp8,0,0.09113599856694539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,4,128,0,1,fp8,fp8,0,0.07236266632874806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,8,128,0,1,float16,float16,0,0.09233066439628601
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,8,128,0,1,float16,fp8,0,0.09181867043177287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,48,128,0,1,float16,float16,0,0.05614933371543884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,48,8,128,0,1,fp8,fp8,0,0.0730453332265218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,48,128,0,1,float16,fp8,0,0.05273599922657013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,48,128,0,1,fp8,fp8,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,1,128,0,1,float16,float16,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,1,128,0,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,1,128,0,1,fp8,fp8,0,0.04027733455101649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,2,128,0,1,float16,float16,0,0.05000533163547516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,2,128,0,1,float16,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,2,128,0,1,fp8,fp8,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,4,128,0,1,float16,float16,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,4,128,0,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,4,128,0,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,8,128,0,1,float16,float16,0,0.05000533163547516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,8,128,0,1,float16,fp8,0,0.050517335534095764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,48,8,128,0,1,fp8,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,48,128,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,48,128,0,1,float16,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,1,128,0,1,float16,float16,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,48,128,0,1,fp8,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,1,128,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,1,128,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,2,128,0,1,float16,float16,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,2,128,0,1,float16,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,2,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,4,128,0,1,float16,float16,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,4,128,0,1,float16,fp8,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,8,128,0,1,float16,float16,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,8,128,0,1,float16,fp8,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,4,128,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,48,8,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,48,128,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,48,128,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,48,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,1,128,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,1,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,1,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,2,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,2,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,4,128,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,2,128,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,4,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,4,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,8,128,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,8,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,48,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,48,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,48,8,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,48,128,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,1,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,1,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,1,128,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,2,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,2,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,2,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,4,128,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,4,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,4,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,8,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,8,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,48,8,128,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,48,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,48,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,48,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,1,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,1,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,2,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,2,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,4,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,4,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,8,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,8,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,48,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,48,8,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,48,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,48,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,1,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,2,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,4,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,4,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,8,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,8,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,48,8,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,48,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,48,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,48,128,0,1,fp8,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,1,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,2,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,4,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,4,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,8,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,8,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,48,8,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,1,128,0,1,fp8,fp8,0,87.9856669108073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,2,128,0,1,fp8,fp8,0,85.79890950520833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,float16,0,142.40716552734375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,1,128,0,1,float16,fp8,0,141.8098347981771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,1,128,0,1,float16,float16,0,142.98129272460938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,fp8,0,145.7245890299479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,float16,0,146.6775919596354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,fp8,0,144.38690185546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,4,128,0,1,fp8,fp8,0,87.5333964029948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,40,128,0,1,fp8,fp8,0,45.461334228515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,float16,0,76.88072713216145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,1,128,0,1,float16,float16,0,71.30641174316406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,fp8,0,76.8025614420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,8,128,0,1,fp8,fp8,0,88.35054524739583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,float16,0,143.22807820638022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,1,128,0,1,fp8,fp8,0,42.0667724609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,1,128,0,1,float16,fp8,0,72.38434346516927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,2,128,0,1,fp8,fp8,0,41.95720418294271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,fp8,0,145.50187174479166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,fp8,0,71.41085815429688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,float16,0,72.14506530761719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,4,128,0,1,fp8,fp8,0,42.37806955973307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,float16,0,71.73956298828125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,fp8,0,70.97173563639323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,float16,0,72.14045715332031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,8,128,0,1,fp8,fp8,0,42.141014099121094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,40,128,0,1,fp8,fp8,0,22.84356180826823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,float16,0,36.27776082356771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,fp8,0,36.613972981770836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,1,128,0,1,float16,float16,0,34.4444580078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,1,128,0,1,float16,fp8,0,34.113024393717446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,fp8,0,73.56245422363281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,1,128,0,1,fp8,fp8,0,20.42999521891276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,2,128,0,1,fp8,fp8,0,20.005716959635418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,float16,0,34.70643107096354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,fp8,0,35.491668701171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,4,128,0,1,fp8,fp8,0,20.46600596110026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,float16,0,34.84825642903646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,fp8,0,34.92471567789713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,float16,0,35.444053649902344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,40,128,0,1,fp8,fp8,0,11.072341918945312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,fp8,0,34.62126922607422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,float16,0,18.54532241821289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,40,8,128,0,1,fp8,fp8,0,20.59008026123047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,fp8,0,18.768042246500652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,1,128,0,1,fp8,fp8,0,10.497706731160482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,1,128,0,1,float16,float16,0,17.545387268066406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,1,128,0,1,float16,fp8,0,17.24962107340495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,2,128,0,1,fp8,fp8,0,10.512042363484701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,float16,0,17.0427729288737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,fp8,0,17.300479888916016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,float16,0,17.233919779459637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,fp8,0,17.665194193522137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,4,128,0,1,fp8,fp8,0,10.654207865397135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,float16,0,17.545045216878254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,fp8,0,17.57371775309245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,40,8,128,0,1,fp8,fp8,0,10.72162119547526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,1,128,0,1,fp8,fp8,0,47.841451009114586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,2,128,0,1,fp8,fp8,0,48.34952290852865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,1,128,0,1,float16,float16,0,83.3599141438802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,1,128,0,1,float16,fp8,0,84.23696899414062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,float16,0,85.68986002604167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,fp8,0,85.6258544921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,float16,0,85.33436075846355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,fp8,0,84.39330037434895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,4,128,0,1,fp8,fp8,0,48.168619791666664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,40,128,0,1,fp8,fp8,0,26.021204630533855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,8,128,0,1,fp8,fp8,0,49.10455322265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,float16,0,43.372884114583336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,fp8,0,42.59413401285807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,1,128,0,1,float16,float16,0,39.94982401529948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,float16,0,85.56493123372395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,1,128,0,1,float16,fp8,0,39.89879353841146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,fp8,0,83.99633280436198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,1,128,0,1,fp8,fp8,0,23.5153071085612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,2,128,0,1,fp8,fp8,0,24.188756306966145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,float16,0,39.77830505371094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,fp8,0,40.853502909342446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,4,128,0,1,fp8,fp8,0,23.436116536458332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,float16,0,39.897430419921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,fp8,0,40.43366495768229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,8,128,0,1,fp8,fp8,0,24.600746154785156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,float16,0,39.78154754638672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,40,128,0,1,fp8,fp8,0,12.997973124186197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,fp8,0,40.29491170247396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,float16,0,19.87976582845052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,fp8,0,20.94523747762044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,1,128,0,1,float16,float16,0,19.650901794433594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,1,128,0,1,fp8,fp8,0,12.176896413167318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,1,128,0,1,float16,fp8,0,19.470677693684895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,2,128,0,1,fp8,fp8,0,12.034730275472006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,float16,0,19.837610880533855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,fp8,0,20.091562906901043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,4,128,0,1,fp8,fp8,0,11.949396769205729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,float16,0,20.021589914957683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,fp8,0,20.58308283487956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,8,128,0,1,fp8,fp8,0,12.249258677164713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,float16,0,20.18713633219401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,40,128,0,1,fp8,fp8,0,6.526122411092122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,fp8,0,20.336811065673828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,float16,0,10.674858093261719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,fp8,0,10.444458643595377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,1,128,0,1,float16,float16,0,10.101418813069662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,1,128,0,1,float16,fp8,0,10.30195172627767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,1,128,0,1,fp8,fp8,0,5.832533518473308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,2,128,0,1,fp8,fp8,0,5.446826934814453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,float16,0,9.93348248799642
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,fp8,0,9.92904535929362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,4,128,0,1,fp8,fp8,0,5.957632064819336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,float16,0,9.996970494588217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,fp8,0,10.141013463338217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,float16,0,10.401962916056315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,fp8,0,10.208938598632812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,40,8,128,0,1,fp8,fp8,0,6.0967254638671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,1,128,0,1,fp8,fp8,0,33.59607442220052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,2,128,0,1,fp8,fp8,0,34.95526377360026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,1,128,0,1,float16,fp8,0,56.8461659749349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,float16,0,56.12083435058594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,1,128,0,1,float16,float16,0,57.505961100260414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,fp8,0,56.33911641438802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,float16,0,56.76731872558594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,fp8,0,57.66809590657552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,4,128,0,1,fp8,fp8,0,34.129920959472656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,float16,0,30.708394368489582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,fp8,0,29.784576416015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,8,128,0,1,fp8,fp8,0,34.668713887532554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,40,128,0,1,fp8,fp8,0,19.257855733235676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,float16,0,59.519999186197914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,1,128,0,1,float16,float16,0,28.343124389648438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,1,128,0,1,float16,fp8,0,27.861162821451824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,fp8,0,57.61365254720052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,1,128,0,1,fp8,fp8,0,16.950101216634113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,2,128,0,1,fp8,fp8,0,16.949418385823567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,float16,0,27.926358540852863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,fp8,0,27.760299682617188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,4,128,0,1,fp8,fp8,0,16.87927500406901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,float16,0,29.239295959472656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,fp8,0,28.322303771972656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,8,128,0,1,fp8,fp8,0,17.134591420491535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,float16,0,28.249430338541668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,float16,0,15.144789377848307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,fp8,0,28.813995361328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,fp8,0,14.917973836263021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,40,128,0,1,fp8,fp8,0,9.567573547363281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,1,128,0,1,float16,float16,0,14.268074035644531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,1,128,0,1,float16,fp8,0,14.151167551676432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,1,128,0,1,fp8,fp8,0,8.120490392049154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,float16,0,14.0852902730306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,2,128,0,1,fp8,fp8,0,8.791552225748697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,fp8,0,13.938176472981771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,float16,0,13.874516805013021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,4,128,0,1,fp8,fp8,0,8.794111887613932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,fp8,0,14.291285196940104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,float16,0,14.508373260498047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,fp8,0,14.217727661132812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,40,8,128,0,1,fp8,fp8,0,8.437589645385742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,float16,0,7.861248016357422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,40,128,0,1,fp8,fp8,0,4.553898811340332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,fp8,0,7.4763946533203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,1,128,0,1,float16,float16,0,7.156565348307292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,1,128,0,1,float16,fp8,0,7.148031870524089
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,1,128,0,1,fp8,fp8,0,4.105216026306152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,2,128,0,1,fp8,fp8,0,4.087807973225911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,float16,0,6.862848281860352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,fp8,0,6.879231770833333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,float16,0,6.822741190592448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,4,128,0,1,fp8,fp8,0,3.8601385752360025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,fp8,0,7.18728510538737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,float16,0,7.189674377441406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,8,128,0,1,fp8,fp8,0,4.155392011006673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,fp8,0,6.934186935424805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,1,128,0,1,fp8,fp8,0,45.39818827311198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,2,128,0,1,fp8,fp8,0,45.739176432291664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,1,128,0,1,float16,fp8,0,77.69190470377605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,1,128,0,1,float16,float16,0,79.45420837402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,fp8,0,77.32463073730469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,float16,0,79.94009399414062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,float16,0,81.0810038248698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,fp8,0,79.39413452148438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,4,128,0,1,fp8,fp8,0,46.22711690266927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,40,128,0,1,fp8,fp8,0,26.277206420898438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,float16,0,40.107521057128906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,8,128,0,1,fp8,fp8,0,47.0674794514974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,fp8,0,40.22681681315104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,1,128,0,1,float16,float16,0,37.14816029866537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,float16,0,79.03846232096355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,1,128,0,1,float16,fp8,0,36.42658233642578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,fp8,0,78.44897969563802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,1,128,0,1,fp8,fp8,0,22.230016072591145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,2,128,0,1,fp8,fp8,0,22.881451924641926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,float16,0,36.855979919433594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,fp8,0,36.791979471842446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,float16,0,37.36200459798177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,4,128,0,1,fp8,fp8,0,22.097750345865887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,fp8,0,37.05036926269531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,8,128,0,1,fp8,fp8,0,23.381675720214844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,float16,0,37.26318868001302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,float16,0,20.33117930094401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,fp8,0,19.439957936604817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,fp8,0,38.57083638509115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,40,128,0,1,fp8,fp8,0,12.774740854899088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,1,128,0,1,float16,float16,0,17.98980204264323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,1,128,0,1,float16,fp8,0,18.300757090250652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,1,128,0,1,fp8,fp8,0,11.242837270100912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,2,128,0,1,fp8,fp8,0,11.132245381673178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,float16,0,18.221227010091145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,4,128,0,1,fp8,fp8,0,11.14572779337565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,fp8,0,17.990314483642578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,float16,0,18.630143483479817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,fp8,0,18.458452860514324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,float16,0,18.594132741292317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,fp8,0,19.065685272216797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,40,8,128,0,1,fp8,fp8,0,11.473066965738932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,40,128,0,1,fp8,fp8,0,6.274218877156575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,float16,0,9.546581268310547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,fp8,0,9.970687866210938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,1,128,0,1,float16,float16,0,9.330005645751953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,1,128,0,1,float16,fp8,0,9.193471908569336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,1,128,0,1,fp8,fp8,0,5.400234858194987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,2,128,0,1,fp8,fp8,0,5.001728057861328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,float16,0,9.21514638264974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,fp8,0,9.148927688598633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,float16,0,8.952149073282877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,4,128,0,1,fp8,fp8,0,5.085525194803874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,fp8,0,9.164458592732748
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,float16,0,9.598634719848633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,fp8,0,9.429503758748373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,40,8,128,0,1,fp8,fp8,0,5.468159993489583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,float16,0,4.846933364868164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,fp8,0,4.801706631978353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,40,128,0,1,fp8,fp8,0,3.092992146809896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,1,128,0,1,float16,float16,0,4.489386558532715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,1,128,0,1,float16,fp8,0,4.454741477966309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,1,128,0,1,fp8,fp8,0,2.518869400024414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,float16,0,4.341589291890462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,2,128,0,1,fp8,fp8,0,2.5456639925638833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,fp8,0,4.110677401224772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,float16,0,4.505941390991211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,fp8,0,4.279295921325684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,4,128,0,1,fp8,fp8,0,2.6014720598856607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,float16,0,4.511914571126302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,fp8,0,4.527445475260417
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,40,8,128,0,1,fp8,fp8,0,2.643967946370443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,1,128,0,1,fp8,fp8,0,26.44104512532552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,2,128,0,1,fp8,fp8,0,26.393941243489582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,1,128,0,1,float16,fp8,0,43.404459635416664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,1,128,0,1,float16,float16,0,44.75289408365885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,float16,0,43.0202891031901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,fp8,0,43.904683430989586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,float16,0,44.316162109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,fp8,0,43.3971201578776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,4,128,0,1,fp8,fp8,0,27.172693888346355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,float16,0,23.696383158365887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,40,128,0,1,fp8,fp8,0,15.941802978515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,8,128,0,1,fp8,fp8,0,28.322303771972656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,fp8,0,24.122538248697918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,1,128,0,1,float16,float16,0,21.244244893391926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,float16,0,43.44593302408854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,fp8,0,43.89990234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,1,128,0,1,float16,fp8,0,21.070507049560547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,1,128,0,1,fp8,fp8,0,12.8274777730306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,2,128,0,1,fp8,fp8,0,13.036715189615885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,float16,0,20.94011688232422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,fp8,0,21.56492869059245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,float16,0,21.051904042561848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,fp8,0,20.511573791503906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,4,128,0,1,fp8,fp8,0,13.179392496744791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,8,128,0,1,fp8,fp8,0,13.588651021321615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,float16,0,20.742144266764324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,fp8,0,22.01361083984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,40,128,0,1,fp8,fp8,0,7.773866653442383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,float16,0,12.02346674601237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,fp8,0,12.020394643147787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,1,128,0,1,float16,float16,0,10.764288584391275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,1,128,0,1,float16,fp8,0,10.690559387207031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,1,128,0,1,fp8,fp8,0,6.173695882161458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,float16,0,10.798591613769531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,2,128,0,1,fp8,fp8,0,6.1204477945963545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,fp8,0,10.812416076660156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,4,128,0,1,fp8,fp8,0,6.141098658243815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,float16,0,10.967381795247396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,fp8,0,11.130879720052084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,float16,0,10.846378326416016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,fp8,0,11.06670888264974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,40,8,128,0,1,fp8,fp8,0,6.63705571492513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,float16,0,5.750954945882161
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,40,128,0,1,fp8,fp8,0,3.8702081044514975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,fp8,0,5.720405578613281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,1,128,0,1,float16,float16,0,5.313877423604329
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,1,128,0,1,float16,fp8,0,5.40603764851888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,1,128,0,1,fp8,fp8,0,2.863445281982422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,2,128,0,1,fp8,fp8,0,2.915328025817871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,float16,0,5.277013460795085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,fp8,0,5.233322779337565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,float16,0,5.222570737202962
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,fp8,0,5.202602704366048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,4,128,0,1,fp8,fp8,0,3.021482785542806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,float16,0,5.26250680287679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,fp8,0,5.177002588907878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,40,8,128,0,1,fp8,fp8,0,3.1424853006998696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,fp8,0,2.850133260091146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,float16,0,2.978133201599121
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,40,128,0,1,fp8,fp8,0,1.914197285970052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,1,128,0,1,float16,float16,0,2.370730717976888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,1,128,0,1,float16,fp8,0,2.429098606109619
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,1,128,0,1,fp8,fp8,0,1.4540799458821614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,float16,0,2.375509262084961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,2,128,0,1,fp8,fp8,0,1.4440107345581055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,fp8,0,2.46562131245931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,float16,0,2.473642667134603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,fp8,0,2.4077653884887695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,4,128,0,1,fp8,fp8,0,1.4585173924763997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,float16,0,2.563584009806315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,fp8,0,2.5053866704305015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,40,8,128,0,1,fp8,fp8,0,1.5023786226908367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,1,128,0,1,fp8,fp8,0,26.600107828776043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,2,128,0,1,fp8,fp8,0,26.2835210164388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,1,128,0,1,float16,float16,0,41.82886505126953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,1,128,0,1,float16,fp8,0,41.0949961344401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,float16,0,40.50841522216797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,fp8,0,41.06734975179037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,float16,0,43.2513682047526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,fp8,0,41.87682088216146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,4,128,0,1,fp8,fp8,0,27.668479919433594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,float16,0,23.200937906901043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,40,128,0,1,fp8,fp8,0,16.464725494384766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,fp8,0,23.17943572998047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,1,128,0,1,float16,float16,0,20.057599385579426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,8,128,0,1,fp8,fp8,0,27.55823008219401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,float16,0,42.906453450520836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,fp8,0,42.559488932291664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,1,128,0,1,float16,fp8,0,19.876693725585938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,1,128,0,1,fp8,fp8,0,12.388010660807291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,2,128,0,1,fp8,fp8,0,12.505770365397135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,float16,0,20.620970408121746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,fp8,0,20.186965942382812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,float16,0,20.30950419108073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,fp8,0,20.789418538411457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,4,128,0,1,fp8,fp8,0,12.695210774739584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,8,128,0,1,fp8,fp8,0,13.08569590250651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,float16,0,19.98950449625651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,fp8,0,19.739477793375652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,float16,0,11.61181894938151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,fp8,0,11.292330423990885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,40,128,0,1,fp8,fp8,0,8.260778427124023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,1,128,0,1,float16,float16,0,10.21934954325358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,1,128,0,1,float16,fp8,0,10.068821589152018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,1,128,0,1,fp8,fp8,0,6.093823750813802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,2,128,0,1,fp8,fp8,0,6.069760004679362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,float16,0,10.407594680786133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,fp8,0,10.03212801615397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,float16,0,10.362709045410156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,fp8,0,10.240512212117514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,4,128,0,1,fp8,fp8,0,6.173354466756185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,float16,0,10.194431940714518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,fp8,0,10.105855941772461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,40,8,128,0,1,fp8,fp8,0,6.459221522013347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,float16,0,5.938346862792969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,40,128,0,1,fp8,fp8,0,4.0306345621744795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,fp8,0,5.714090983072917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,1,128,0,1,float16,float16,0,4.5856428146362305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,1,128,0,1,float16,fp8,0,4.809215863545735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,1,128,0,1,fp8,fp8,0,2.771967887878418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,float16,0,4.738047917683919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,fp8,0,4.943189303080241
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,2,128,0,1,fp8,fp8,0,2.895872116088867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,4,128,0,1,fp8,fp8,0,2.9315414428710938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,float16,0,4.8346452713012695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,fp8,0,4.826794624328613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,float16,0,5.024256070454915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,fp8,0,4.922538757324219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,40,8,128,0,1,fp8,fp8,0,3.0272852579752603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,float16,0,2.932565371195475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,fp8,0,2.891434669494629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,40,128,0,1,fp8,fp8,0,1.9997013409932454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,1,128,0,1,float16,float16,0,2.199039936065674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,1,128,0,1,float16,fp8,0,2.1934080123901367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,1,128,0,1,fp8,fp8,0,1.4023680686950684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,float16,0,2.229760011037191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,fp8,0,2.2118399937947593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,2,128,0,1,fp8,fp8,0,1.4119253158569336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,float16,0,2.3379626274108887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,4,128,0,1,fp8,fp8,0,1.437013308207194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,fp8,0,2.280277411142985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,float16,0,2.378922621409098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,fp8,0,2.4268800417582193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,40,8,128,0,1,fp8,fp8,0,1.4955520629882812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,float16,0,1.441962718963623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,fp8,0,1.4254080454508464
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,40,128,0,1,fp8,fp8,0,0.9922560056050619
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,1,128,0,1,float16,float16,0,1.122986634572347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,1,128,0,1,fp8,fp8,0,0.6850559711456299
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,1,128,0,1,float16,fp8,0,1.132373332977295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,fp8,0,1.13373867670695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,float16,0,1.1557546456654866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,2,128,0,1,fp8,fp8,0,0.6801066398620605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,float16,0,1.152511994043986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,4,128,0,1,fp8,fp8,0,0.7009279727935791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,fp8,0,1.1521706581115723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,float16,0,1.1388586362202961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,fp8,0,1.1513173580169678
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,40,8,128,0,1,fp8,fp8,0,0.7306239604949951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,1,128,0,1,fp8,fp8,0,15.714132944742838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,2,128,0,1,fp8,fp8,0,15.355733235677084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,1,128,0,1,float16,float16,0,25.026901245117188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,1,128,0,1,float16,fp8,0,24.072532653808594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,float16,0,24.21435801188151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,fp8,0,24.759808858235676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,float16,0,24.926719665527344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,fp8,0,24.249343872070312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,4,128,0,1,fp8,fp8,0,16.09557342529297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,40,128,0,1,fp8,fp8,0,10.68987782796224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,float16,0,14.2653439839681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,fp8,0,14.558036804199219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,8,128,0,1,fp8,fp8,0,16.303787231445312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,1,128,0,1,float16,float16,0,11.856725056966146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,float16,0,25.516031901041668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,fp8,0,25.50050099690755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,1,128,0,1,float16,fp8,0,12.351317087809244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,1,128,0,1,fp8,fp8,0,7.438677469889323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,2,128,0,1,fp8,fp8,0,7.2741546630859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,float16,0,12.000938415527344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,4,128,0,1,fp8,fp8,0,7.464277267456055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,fp8,0,12.168362935384115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,float16,0,11.967146555582682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,fp8,0,12.309844970703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,8,128,0,1,fp8,fp8,0,7.631359736124675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,float16,0,12.546048482259115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,40,128,0,1,fp8,fp8,0,5.3766829172770185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,fp8,0,12.182528177897135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,1,128,0,1,float16,float16,0,5.363370895385742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,float16,0,7.1975250244140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,1,128,0,1,float16,fp8,0,5.779797236124675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,fp8,0,7.284565607706706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,1,128,0,1,fp8,fp8,0,3.447807947794596
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,2,128,0,1,fp8,fp8,0,3.539456049601237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,float16,0,5.563050587972005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,4,128,0,1,fp8,fp8,0,3.570687929789225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,fp8,0,5.948586781819661
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,float16,0,5.91001574198405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,fp8,0,5.824853261311849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,float16,0,6.122154871622722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,fp8,0,5.9136002858479815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,40,128,0,1,fp8,fp8,0,2.634069283803304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,float16,0,3.6732587814331055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,1,128,0,1,float16,float16,0,2.7007999420166016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,fp8,0,3.5800746281941733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,40,8,128,0,1,fp8,fp8,0,3.803818702697754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,1,128,0,1,float16,fp8,0,2.6680320103963218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,1,128,0,1,fp8,fp8,0,1.7223679224650066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,float16,0,2.7190612157185874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,2,128,0,1,fp8,fp8,0,1.7271466255187988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,fp8,0,2.786304155985514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,4,128,0,1,fp8,fp8,0,1.7827839851379395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,float16,0,2.783402760823568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,fp8,0,2.7607040405273438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,float16,0,2.8619092305501304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,fp8,0,2.919253349304199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,40,8,128,0,1,fp8,fp8,0,1.8396159807840984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,float16,0,1.8189652760823567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,1,128,0,1,float16,float16,0,1.287338654200236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,fp8,0,1.798485279083252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,40,128,0,1,fp8,fp8,0,1.2781226634979248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,1,128,0,1,fp8,fp8,0,0.8152746359507242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,1,128,0,1,float16,fp8,0,1.2796586354573567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,float16,0,1.3173759778340657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,fp8,0,1.279146671295166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,2,128,0,1,fp8,fp8,0,0.8352426687876383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,float16,0,1.313962697982788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,fp8,0,1.3347840309143066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,4,128,0,1,fp8,fp8,0,0.8565759658813477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,float16,0,1.369599978129069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,fp8,0,1.3503146171569824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,40,8,128,0,1,fp8,fp8,0,0.9231359958648682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,float16,0,0.8917333285013834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,fp8,0,0.8606719970703125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,40,128,0,1,fp8,fp8,0,0.6034773190816244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,1,128,0,1,float16,float16,0,0.7115093072255453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,1,128,0,1,float16,fp8,0,0.6922240257263184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,1,128,0,1,fp8,fp8,0,0.41762133439381915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,float16,0,0.6891520023345947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,fp8,0,0.6993920008341471
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,2,128,0,1,fp8,fp8,0,0.41915734608968097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,float16,0,0.6985387007395426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,fp8,0,0.6848853429158529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,4,128,0,1,fp8,fp8,0,0.43195732434590656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,float16,0,0.6958080132802328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,8,128,0,1,fp8,fp8,0,0.4203519821166992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,fp8,0,0.6848853429158529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,1,128,0,1,float16,float16,0,25.41602071126302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,1,128,0,1,fp8,fp8,0,16.58129119873047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,2,128,0,1,fp8,fp8,0,16.29320526123047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,1,128,0,1,float16,fp8,0,24.253952026367188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,float16,0,24.584192911783855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,fp8,0,25.325909932454426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,fp8,0,24.64307149251302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,float16,0,25.358678181966145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,4,128,0,1,fp8,fp8,0,17.61058171590169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,1,128,0,1,float16,float16,0,11.597482045491537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,40,128,0,1,fp8,fp8,0,11.958101908365885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,float16,0,15.47656504313151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,fp8,0,15.542442321777344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,8,128,0,1,fp8,fp8,0,17.67355728149414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,float16,0,25.454762776692707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,fp8,0,25.169408162434895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,1,128,0,1,float16,fp8,0,11.702442169189453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,1,128,0,1,fp8,fp8,0,7.346517562866211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,2,128,0,1,fp8,fp8,0,7.549610773722331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,float16,0,11.541163126627604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,fp8,0,11.550890604654947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,4,128,0,1,fp8,fp8,0,7.788373311360677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,float16,0,11.739306131998697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,fp8,0,12.411050160725912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,float16,0,12.259498596191406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,float16,0,7.712426503499349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,8,128,0,1,fp8,fp8,0,8.45960553487142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,fp8,0,12.135424296061197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,40,128,0,1,fp8,fp8,0,5.969919840494792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,1,128,0,1,float16,float16,0,5.385728200276692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,fp8,0,7.69979731241862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,1,128,0,1,float16,fp8,0,5.484373092651367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,1,128,0,1,fp8,fp8,0,3.603797276814779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,2,128,0,1,fp8,fp8,0,3.5705172220865884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,float16,0,5.481642405192058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,4,128,0,1,fp8,fp8,0,3.7360639572143555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,fp8,0,5.647359848022461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,fp8,0,5.868714650472005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,float16,0,5.583701451619466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,float16,0,5.955584208170573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,8,128,0,1,fp8,fp8,0,3.9830185572306314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,40,128,0,1,fp8,fp8,0,2.9156694412231445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,float16,0,3.8393173217773438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,1,128,0,1,float16,float16,0,2.750805219014486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,1,128,0,1,float16,fp8,0,2.6922667821248374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,fp8,0,3.805525461832682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,fp8,0,5.941930770874023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,1,128,0,1,fp8,fp8,0,1.7254400253295898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,2,128,0,1,fp8,fp8,0,1.7455786069234211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,float16,0,2.717184066772461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,4,128,0,1,fp8,fp8,0,1.8075307210286458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,fp8,0,2.7105280558268228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,float16,0,2.8576428095499673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,fp8,0,2.7980801264444985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,fp8,0,2.926080067952474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,float16,0,2.946218808492025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,40,8,128,0,1,fp8,fp8,0,1.9239253997802734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,float16,0,1.919317404429118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,1,128,0,1,float16,float16,0,1.3115733464558919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,40,128,0,1,fp8,fp8,0,1.3987840016682942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,fp8,0,1.8679466247558594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,1,128,0,1,float16,fp8,0,1.275391976038615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,1,128,0,1,fp8,fp8,0,0.856234629948934
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,float16,0,1.3216426372528076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,fp8,0,1.2922879854838054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,2,128,0,1,fp8,fp8,0,0.8770559628804525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,float16,0,1.3777920405069988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,4,128,0,1,fp8,fp8,0,0.8944640159606934
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,fp8,0,1.3259092966715496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,8,128,0,1,fp8,fp8,0,0.9714346726735433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,float16,0,1.4315519332885742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,fp8,0,1.4184106190999348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,float16,0,0.9398612976074219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,fp8,0,0.8983893394470215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,1,128,0,1,float16,float16,0,0.6224213441212972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,40,128,0,1,fp8,fp8,0,0.7113386789957682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,1,128,0,1,float16,fp8,0,0.6184959808985392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,1,128,0,1,fp8,fp8,0,0.39202133814493817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,float16,0,0.6323200066884359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,fp8,0,0.6316373348236084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,2,128,0,1,fp8,fp8,0,0.3843413194020589
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,float16,0,0.625493327776591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,fp8,0,0.6256639957427979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,4,128,0,1,fp8,fp8,0,0.4128426710764567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,float16,0,0.6297599871953329
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,8,128,0,1,fp8,fp8,0,0.4307626485824585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,fp8,0,0.6454613208770752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,float16,0,0.37444265683492023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,fp8,0,0.36266668637593585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,40,128,0,1,fp8,fp8,0,0.2921813329060872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,1,128,0,1,float16,float16,0,0.3333119948705037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,1,128,0,1,float16,fp8,0,0.35089067618052167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,1,128,0,1,fp8,fp8,0,0.21452800432840982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,2,128,0,1,fp8,fp8,0,0.21640533208847046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,float16,0,0.35089067618052167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,fp8,0,0.3474773168563843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,float16,0,0.33894399801890057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,fp8,0,0.3399680058161418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,4,128,0,1,fp8,fp8,0,0.2198186715443929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,float16,0,0.3614720106124878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,fp8,0,0.3428693215052287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,40,8,128,0,1,fp8,fp8,0,0.2182826598485311
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,1,128,0,1,float16,float16,0,14.920703887939453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,1,128,0,1,float16,fp8,0,14.785706837972006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,1,128,0,1,fp8,fp8,0,9.718954722086588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,2,128,0,1,fp8,fp8,0,10.233856201171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,float16,0,14.702250162760416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,fp8,0,14.579029083251953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,float16,0,14.751232147216797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,fp8,0,15.277056376139322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,4,128,0,1,fp8,fp8,0,10.816341400146484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,float16,0,15.456255594889322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,1,128,0,1,float16,float16,0,7.214591979980469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,8,128,0,1,fp8,fp8,0,11.655850728352865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,40,128,0,1,fp8,fp8,0,8.027477264404297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,float16,0,10.078378677368164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,fp8,0,10.002602895100912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,fp8,0,14.902100880940756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,1,128,0,1,float16,fp8,0,6.65275764465332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,1,128,0,1,fp8,fp8,0,4.576767921447754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,2,128,0,1,fp8,fp8,0,4.586154619852702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,float16,0,7.331839879353841
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,fp8,0,7.053141276041667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,4,128,0,1,fp8,fp8,0,4.816896120707194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,float16,0,7.012693405151367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,fp8,0,6.9952850341796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,float16,0,7.240362803141276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,fp8,0,7.481514612833659
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,40,8,128,0,1,fp8,fp8,0,5.242538770039876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,40,128,0,1,fp8,fp8,0,4.001109441121419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,1,128,0,1,float16,float16,0,3.3896106084187827
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,float16,0,5.089621225992839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,1,128,0,1,float16,fp8,0,3.349162737528483
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,fp8,0,5.097471872965495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,1,128,0,1,fp8,fp8,0,2.2357333501180015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,float16,0,3.4536107381184897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,2,128,0,1,fp8,fp8,0,2.244607925415039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,4,128,0,1,fp8,fp8,0,2.3325014114379883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,fp8,0,3.365205446879069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,float16,0,3.4950825373331704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,fp8,0,3.532970746358236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,float16,0,3.7299200693766275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,fp8,0,3.672917366027832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,40,8,128,0,1,fp8,fp8,0,2.5603413581848145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,float16,0,2.514944076538086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,1,128,0,1,float16,float16,0,1.6360106468200684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,40,128,0,1,fp8,fp8,0,1.934677282969157
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,fp8,0,2.4734719594319663
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,1,128,0,1,fp8,fp8,0,1.0924373467763264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,1,128,0,1,float16,fp8,0,1.6453973452250164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,float16,0,1.6735572814941406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,2,128,0,1,fp8,fp8,0,1.0920960108439128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,fp8,0,1.6931840578715007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,float16,0,1.7153706550598145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,4,128,0,1,fp8,fp8,0,1.1470506985982258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,fp8,0,1.7018879254659016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,float16,0,1.8360320727030437
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,fp8,0,1.8049707412719727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,40,8,128,0,1,fp8,fp8,0,1.2398933569590251
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,float16,0,1.2373332977294922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,fp8,0,1.2136106491088867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,40,128,0,1,fp8,fp8,0,0.928938627243042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,1,128,0,1,float16,float16,0,0.740010658899943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,1,128,0,1,float16,fp8,0,0.736255963643392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,1,128,0,1,fp8,fp8,0,0.5075626770655314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,float16,0,0.7621973355611166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,fp8,0,0.7550293604532877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,2,128,0,1,fp8,fp8,0,0.5019306739171346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,4,128,0,1,fp8,fp8,0,0.5350399812062582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,fp8,0,0.7785813013712565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,float16,0,0.7951359748840332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,float16,0,0.8401920000712076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,fp8,0,0.8470186392466227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,40,8,128,0,1,fp8,fp8,0,0.5930666526158651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,float16,0,0.5864106814066569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,fp8,0,0.5471573273340861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,40,128,0,1,fp8,fp8,0,0.43263999621073407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,1,128,0,1,float16,float16,0,0.38741334279378253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,1,128,0,1,fp8,fp8,0,0.23500800132751465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,1,128,0,1,float16,fp8,0,0.3969706694285075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,float16,0,0.39867734909057617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,fp8,0,0.39150933424631756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,2,128,0,1,fp8,fp8,0,0.24302933613459268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,float16,0,0.3867306709289551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,fp8,0,0.38792534669240314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,4,128,0,1,fp8,fp8,0,0.23893332481384277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,float16,0,0.38860801855723065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,8,128,0,1,fp8,fp8,0,0.250709335009257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,fp8,0,0.4015786647796631
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,float16,0,0.23278933763504028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,fp8,0,0.23688532908757529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,40,128,0,1,fp8,fp8,0,0.15718400478363037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,1,128,0,1,float16,float16,0,0.20514132579167685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,1,128,0,1,fp8,fp8,0,0.13994666934013367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,1,128,0,1,float16,fp8,0,0.20514132579167685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,fp8,0,0.21333332856496176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,float16,0,0.21606399615605673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,2,128,0,1,fp8,fp8,0,0.14011733730634054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,float16,0,0.21128533283869425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,fp8,0,0.2121386726697286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,4,128,0,1,fp8,fp8,0,0.13994666934013367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,float16,0,0.21128533283869425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,fp8,0,0.21128533283869425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,40,8,128,0,1,fp8,fp8,0,0.1430186629295349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,1,128,0,1,float16,float16,0,14.846635182698568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,1,128,0,1,float16,fp8,0,15.020032246907553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,1,128,0,1,fp8,fp8,0,10.02786127726237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,2,128,0,1,fp8,fp8,0,10.218495686848959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,float16,0,14.457003275553385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,fp8,0,14.717781066894531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,float16,0,14.927701314290365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,fp8,0,15.172096252441406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,4,128,0,1,fp8,fp8,0,11.175764719645182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,float16,0,16.23534901936849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,1,128,0,1,float16,float16,0,7.162709554036458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,8,128,0,1,fp8,fp8,0,11.895637512207031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,fp8,0,15.738367716471354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,float16,0,11.203754425048828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,fp8,0,10.704383850097656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,40,128,0,1,fp8,fp8,0,9.370965321858725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,1,128,0,1,float16,fp8,0,7.252309163411458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,1,128,0,1,fp8,fp8,0,5.029888153076172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,2,128,0,1,fp8,fp8,0,5.132970809936523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,fp8,0,7.108608245849609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,float16,0,7.29634157816569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,4,128,0,1,fp8,fp8,0,5.406208038330078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,float16,0,7.494826634724935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,fp8,0,7.391914367675781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,float16,0,8.012117385864258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,fp8,0,7.851861317952474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,1,128,0,1,float16,float16,0,3.4423465728759766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,40,8,128,0,1,fp8,fp8,0,5.872639973958333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,1,128,0,1,float16,fp8,0,3.522730509440104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,float16,0,5.609813054402669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,40,128,0,1,fp8,fp8,0,4.616533279418945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,fp8,0,5.398015975952148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,1,128,0,1,fp8,fp8,0,2.442922592163086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,float16,0,3.5539627075195312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,fp8,0,3.480064074198405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,2,128,0,1,fp8,fp8,0,2.5038506189982095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,4,128,0,1,fp8,fp8,0,2.621781349182129
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,float16,0,3.6312745412190757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,fp8,0,3.6647253036499023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,float16,0,3.9804585774739585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,fp8,0,3.8954668045043945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,40,8,128,0,1,fp8,fp8,0,2.835455894470215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,1,128,0,1,float16,float16,0,1.7056427001953125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,1,128,0,1,float16,fp8,0,1.6855039596557617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,float16,0,2.802687962849935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,40,128,0,1,fp8,fp8,0,2.3215786616007485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,fp8,0,2.69380251566569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,1,128,0,1,fp8,fp8,0,1.167359987894694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,float16,0,1.7739094098409016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,2,128,0,1,fp8,fp8,0,1.185109297434489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,fp8,0,1.7460907300313313
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,float16,0,1.791317303975423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,4,128,0,1,fp8,fp8,0,1.2564480304718018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,fp8,0,1.7810773849487305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,float16,0,1.9276800155639648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,fp8,0,1.9119787216186523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,40,8,128,0,1,fp8,fp8,0,1.3917867342631023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,float16,0,1.407317320505778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,1,128,0,1,float16,float16,0,0.8046933015187582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,fp8,0,1.3412693341573079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,1,128,0,1,float16,fp8,0,0.7944533030192057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,40,128,0,1,fp8,fp8,0,1.1083093484242756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,1,128,0,1,fp8,fp8,0,0.5626879930496216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,float16,0,0.8330240249633789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,fp8,0,0.8495786984761556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,2,128,0,1,fp8,fp8,0,0.571562647819519
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,float16,0,0.8596479892730713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,4,128,0,1,fp8,fp8,0,0.6106453339258829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,fp8,0,0.8707413673400879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,8,128,0,1,fp8,fp8,0,0.6748159726460775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,fp8,0,0.91921067237854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,float16,0,0.9268906911214193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,float16,0,0.6801066398620605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,fp8,0,0.6278826793034872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,40,128,0,1,fp8,fp8,0,0.5396480162938436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,1,128,0,1,float16,float16,0,0.35703468322753906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,1,128,0,1,float16,fp8,0,0.36317865053812665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,1,128,0,1,fp8,fp8,0,0.22784000635147095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,fp8,0,0.3520853519439697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,float16,0,0.36471466223398846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,2,128,0,1,fp8,fp8,0,0.22681599855422974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,float16,0,0.36113067468007404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,4,128,0,1,fp8,fp8,0,0.23398399353027344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,fp8,0,0.36232535044352215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,float16,0,0.3850239912668864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,fp8,0,0.3848533233006795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,40,8,128,0,1,fp8,fp8,0,0.2734079957008362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,float16,0,0.24012800057729086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,fp8,0,0.21913599967956543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,40,128,0,1,fp8,fp8,0,0.2213546633720398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,1,128,0,1,float16,float16,0,0.18278400103251138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,1,128,0,1,float16,fp8,0,0.18039466937383017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,1,128,0,1,fp8,fp8,0,0.11571199695269267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,float16,0,0.18210132916768393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,fp8,0,0.1786880095799764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,2,128,0,1,fp8,fp8,0,0.11929600437482198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,float16,0,0.19131733973821005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,4,128,0,1,fp8,fp8,0,0.1181013286113739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,fp8,0,0.183296004931132
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,float16,0,0.19694934288660684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,fp8,0,0.1868799924850464
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,40,8,128,0,1,fp8,fp8,0,0.11673600474993388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,float16,0,0.1160533328851064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,fp8,0,0.11639466881752014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,40,128,0,1,fp8,fp8,0,0.07458133498827617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,1,128,0,1,float16,float16,0,0.1114453375339508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,1,128,0,1,float16,fp8,0,0.11127466956774394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,1,128,0,1,fp8,fp8,0,0.08106666803359985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,float16,0,0.1114453375339508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,fp8,0,0.11571199695269267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,2,128,0,1,fp8,fp8,0,0.07167999943097432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,float16,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,fp8,0,0.11349333326021831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,4,128,0,1,fp8,fp8,0,0.0795306662718455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,float16,0,0.11110400160153706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,fp8,0,0.11059199770291646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,40,8,128,0,1,fp8,fp8,0,0.07372800012429555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,1,128,0,1,fp8,fp8,0,7.800490697224935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,1,128,0,1,float16,float16,0,10.636117299397787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,1,128,0,1,float16,fp8,0,10.448042551676432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,2,128,0,1,fp8,fp8,0,7.969280242919922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,2,128,0,1,float16,float16,0,10.877610524495443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,2,128,0,1,float16,fp8,0,10.5797971089681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,4,128,0,1,float16,float16,0,11.080362955729166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,4,128,0,1,float16,fp8,0,11.258880615234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,4,128,0,1,fp8,fp8,0,8.822954813639322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,8,128,0,1,float16,fp8,0,11.773099263509115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,8,128,0,1,float16,float16,0,12.247552235921225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,1,128,0,1,float16,float16,0,5.139967918395996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,40,8,128,0,1,fp8,fp8,0,9.723391850789389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,40,128,0,1,float16,fp8,0,9.712469100952148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,40,128,0,1,fp8,fp8,0,8.573951721191406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,40,128,0,1,float16,float16,0,10.306048075358072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,1,128,0,1,float16,fp8,0,5.260800043741862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,1,128,0,1,fp8,fp8,0,3.766613324483236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,2,128,0,1,float16,float16,0,5.281279881795247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,2,128,0,1,float16,fp8,0,5.2640425364176435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,2,128,0,1,fp8,fp8,0,3.9275519053141275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,4,128,0,1,fp8,fp8,0,4.283050537109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,4,128,0,1,float16,float16,0,5.559125264485677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,4,128,0,1,float16,fp8,0,5.598549524943034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,8,128,0,1,float16,float16,0,5.934421539306641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,8,128,0,1,float16,fp8,0,5.747029622395833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,40,8,128,0,1,fp8,fp8,0,4.740266799926758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,40,128,0,1,float16,float16,0,5.177344004313151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,40,128,0,1,float16,fp8,0,4.887551943461101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,1,128,0,1,float16,fp8,0,2.5214293797810874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,1,128,0,1,float16,float16,0,2.524127960205078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,40,128,0,1,fp8,fp8,0,4.316159884134929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,1,128,0,1,fp8,fp8,0,1.855829397837321
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,2,128,0,1,float16,float16,0,2.58133331934611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,2,128,0,1,float16,fp8,0,2.6267306009928384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,2,128,0,1,fp8,fp8,0,1.9391147295633953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,4,128,0,1,fp8,fp8,0,2.021205266316732
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,4,128,0,1,float16,fp8,0,2.6166613896687827
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,4,128,0,1,float16,float16,0,2.7228161493937173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,8,128,0,1,float16,float16,0,3.0146560668945312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,8,128,0,1,float16,fp8,0,2.8402347564697266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,40,8,128,0,1,fp8,fp8,0,2.2654293378194175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,40,128,0,1,float16,float16,0,2.5871359507242837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,40,128,0,1,float16,fp8,0,2.4702293078104653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,1,128,0,1,float16,float16,0,1.2231679757436116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,1,128,0,1,float16,fp8,0,1.2125866413116455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,40,128,0,1,fp8,fp8,0,2.093397299448649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,1,128,0,1,fp8,fp8,0,0.8763733704884847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,2,128,0,1,float16,float16,0,1.257813294728597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,2,128,0,1,float16,fp8,0,1.2318720022837322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,2,128,0,1,fp8,fp8,0,0.9057280222574869
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,4,128,0,1,float16,float16,0,1.3153279622395833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,4,128,0,1,float16,fp8,0,1.2881920337677002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,4,128,0,1,fp8,fp8,0,0.9507839679718018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,8,128,0,1,float16,float16,0,1.4201173782348633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,8,128,0,1,float16,fp8,0,1.418922742207845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,40,8,128,0,1,fp8,fp8,0,1.0699093341827393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,40,128,0,1,float16,float16,0,1.2950186729431152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,40,128,0,1,float16,fp8,0,1.2317012945810955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,40,128,0,1,fp8,fp8,0,1.0089813073476155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,1,128,0,1,float16,fp8,0,0.5556906859079996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,1,128,0,1,float16,float16,0,0.558079997698466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,1,128,0,1,fp8,fp8,0,0.4164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,2,128,0,1,float16,float16,0,0.5886293252309164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,2,128,0,1,float16,fp8,0,0.5918720165888468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,2,128,0,1,fp8,fp8,0,0.4408320188522339
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,4,128,0,1,float16,float16,0,0.6186666488647461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,4,128,0,1,fp8,fp8,0,0.47138134638468426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,4,128,0,1,float16,fp8,0,0.6251519918441772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,8,128,0,1,float16,float16,0,0.6784000396728516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,8,128,0,1,float16,fp8,0,0.6606506506601969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,40,8,128,0,1,fp8,fp8,0,0.5266773303349813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,40,128,0,1,float16,float16,0,0.59170134862264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,40,128,0,1,float16,fp8,0,0.5463039875030518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,40,128,0,1,fp8,fp8,0,0.47018667062123615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,1,128,0,1,float16,float16,0,0.22801067431767783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,1,128,0,1,fp8,fp8,0,0.14728533228238425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,1,128,0,1,float16,fp8,0,0.23227733373641968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,2,128,0,1,float16,float16,0,0.2259626587231954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,2,128,0,1,float16,fp8,0,0.237226665019989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,2,128,0,1,fp8,fp8,0,0.1520639955997467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,4,128,0,1,float16,float16,0,0.23176532983779907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,4,128,0,1,float16,fp8,0,0.24268800020217896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,4,128,0,1,fp8,fp8,0,0.15684266885121664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,8,128,0,1,float16,float16,0,0.25804799795150757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,8,128,0,1,fp8,fp8,0,0.2065066695213318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,40,8,128,0,1,float16,fp8,0,0.2512213389078776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,40,128,0,1,float16,float16,0,0.1771519978841146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,40,128,0,1,float16,fp8,0,0.1462613344192505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,40,128,0,1,fp8,fp8,0,0.18278400103251138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,1,128,0,1,float16,fp8,0,0.11485866705576579
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,1,128,0,1,float16,float16,0,0.11502933502197266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,1,128,0,1,fp8,fp8,0,0.08004266520341237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,2,128,0,1,float16,float16,0,0.11571199695269267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,2,128,0,1,float16,fp8,0,0.11451733112335205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,2,128,0,1,fp8,fp8,0,0.08140799899895985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,4,128,0,1,float16,float16,0,0.1153706709543864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,4,128,0,1,float16,fp8,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,4,128,0,1,fp8,fp8,0,0.07987200220425923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,8,128,0,1,float16,float16,0,0.11793067057927449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,8,128,0,1,fp8,fp8,0,0.08072533210118611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,40,8,128,0,1,float16,fp8,0,0.11485866705576579
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,40,128,0,1,float16,float16,0,0.08004266520341237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,40,128,0,1,float16,fp8,0,0.08226133386294048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,40,128,0,1,fp8,fp8,0,0.04863999783992767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,1,128,0,1,float16,float16,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,1,128,0,1,float16,fp8,0,0.06877866884072621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,2,128,0,1,float16,float16,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,1,128,0,1,fp8,fp8,0,0.04795733094215393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,2,128,0,1,float16,fp8,0,0.06997333467006683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,2,128,0,1,fp8,fp8,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,4,128,0,1,float16,float16,0,0.06946133573849995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,4,128,0,1,float16,fp8,0,0.06980266670385997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,4,128,0,1,fp8,fp8,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,8,128,0,1,float16,float16,0,0.06877866884072621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,8,128,0,1,float16,fp8,0,0.0682666649421056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,40,8,128,0,1,fp8,fp8,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,40,128,0,1,float16,float16,0,0.04266666869322459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,40,128,0,1,float16,fp8,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,40,128,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,1,128,0,1,float16,float16,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,1,128,0,1,float16,fp8,0,0.04249600072701772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,1,128,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,2,128,0,1,float16,float16,0,0.04215466479460398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,2,128,0,1,float16,fp8,0,0.04249600072701772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,2,128,0,1,fp8,fp8,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,4,128,0,1,float16,float16,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,4,128,0,1,float16,fp8,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,4,128,0,1,fp8,fp8,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,8,128,0,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,8,128,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,40,8,128,0,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,1,128,0,1,fp8,fp8,0,3.3141759236653647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,1,128,0,1,float16,float16,0,4.510890642801921
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,1,128,0,1,float16,fp8,0,4.497749328613281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,2,128,0,1,float16,float16,0,4.67029349009196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,2,128,0,1,float16,fp8,0,4.619434674580892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,2,128,0,1,fp8,fp8,0,3.4904747009277344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,4,128,0,1,float16,fp8,0,4.985002517700195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,4,128,0,1,float16,float16,0,5.041664123535156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,4,128,0,1,fp8,fp8,0,3.800917307535807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,8,128,0,1,float16,float16,0,5.641216278076172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,8,128,0,1,float16,fp8,0,5.509802500406901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,40,8,128,0,1,fp8,fp8,0,4.318719863891602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,1,128,0,1,float16,float16,0,2.2918826738993325
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,40,128,0,1,float16,float16,0,5.179050763448079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,40,128,0,1,float16,fp8,0,4.89250119527181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,40,128,0,1,fp8,fp8,0,4.225536028544108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,1,128,0,1,float16,fp8,0,2.313216050465902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,1,128,0,1,fp8,fp8,0,1.5822505950927734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,2,128,0,1,float16,float16,0,2.3649279276529946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,2,128,0,1,float16,fp8,0,2.391040007273356
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,2,128,0,1,fp8,fp8,0,1.663658618927002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,4,128,0,1,float16,float16,0,2.4734719594319663
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,4,128,0,1,fp8,fp8,0,1.8008747100830078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,4,128,0,1,float16,fp8,0,2.387455940246582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,8,128,0,1,float16,float16,0,2.741759936014811
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,8,128,0,1,float16,fp8,0,2.686293284098307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,40,8,128,0,1,fp8,fp8,0,2.0787199338277182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,40,128,0,1,float16,float16,0,2.621269385019938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,40,128,0,1,float16,fp8,0,2.471423943837484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,1,128,0,1,float16,float16,0,1.1318613688151042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,1,128,0,1,float16,fp8,0,1.12281600634257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,40,128,0,1,fp8,fp8,0,2.1050027211507163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,1,128,0,1,fp8,fp8,0,0.746837298075358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,2,128,0,1,float16,fp8,0,1.0898773670196533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,2,128,0,1,float16,float16,0,1.1675306955973308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,2,128,0,1,fp8,fp8,0,0.830293337504069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,4,128,0,1,float16,float16,0,1.1786239941914876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,4,128,0,1,fp8,fp8,0,0.8540159861246744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,8,128,0,1,float16,float16,0,1.3443412780761719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,4,128,0,1,float16,fp8,0,1.1694080034891765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,8,128,0,1,float16,fp8,0,1.3148159980773926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,40,8,128,0,1,fp8,fp8,0,0.9840640226999918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,40,128,0,1,float16,float16,0,1.2934827009836833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,40,128,0,1,float16,fp8,0,1.2134400208791096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,40,128,0,1,fp8,fp8,0,0.9792853196461996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,1,128,0,1,float16,float16,0,0.4522666533788045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,1,128,0,1,float16,fp8,0,0.4795733292897542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,1,128,0,1,fp8,fp8,0,0.3397973378499349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,2,128,0,1,float16,float16,0,0.5114880005518595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,2,128,0,1,float16,fp8,0,0.49937065442403156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,2,128,0,1,fp8,fp8,0,0.3662506739298503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,4,128,0,1,float16,float16,0,0.52292267481486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,4,128,0,1,float16,fp8,0,0.5108053286870321
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,4,128,0,1,fp8,fp8,0,0.3938986857732137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,8,128,0,1,float16,float16,0,0.6166186730066935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,8,128,0,1,fp8,fp8,0,0.4551680088043213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,40,128,0,1,float16,float16,0,0.5981866518656412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,40,8,128,0,1,float16,fp8,0,0.5992106596628824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,40,128,0,1,float16,fp8,0,0.5524479945500692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,40,128,0,1,fp8,fp8,0,0.4519253174463908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,1,128,0,1,float16,float16,0,0.15684266885121664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,1,128,0,1,float16,fp8,0,0.1585493286450704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,1,128,0,1,fp8,fp8,0,0.10478933652242024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,2,128,0,1,float16,float16,0,0.1628159979979197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,2,128,0,1,float16,fp8,0,0.15940266847610474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,2,128,0,1,fp8,fp8,0,0.106495996316274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,4,128,0,1,float16,float16,0,0.17151999473571777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,4,128,0,1,float16,fp8,0,0.16861865917841592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,4,128,0,1,fp8,fp8,0,0.10990933577219646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,8,128,0,1,float16,float16,0,0.19780266284942627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,8,128,0,1,float16,fp8,0,0.18961066007614136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,40,8,128,0,1,fp8,fp8,0,0.17254400253295898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,40,128,0,1,float16,float16,0,0.14523733655611673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,40,128,0,1,fp8,fp8,0,0.15223466356595358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,1,128,0,1,float16,fp8,0,0.08328533172607422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,1,128,0,1,float16,float16,0,0.08362666765848796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,40,128,0,1,float16,fp8,0,0.11622400085131328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,1,128,0,1,fp8,fp8,0,0.059903999169667564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,2,128,0,1,float16,float16,0,0.08106666803359985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,2,128,0,1,float16,fp8,0,0.0820906658967336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,2,128,0,1,fp8,fp8,0,0.05853866537412008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,4,128,0,1,float16,fp8,0,0.08311466872692108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,4,128,0,1,float16,float16,0,0.08618666728337605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,4,128,0,1,fp8,fp8,0,0.059392000238100685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,8,128,0,1,float16,fp8,0,0.08294400076071422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,8,128,0,1,float16,float16,0,0.08499200145403545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,40,8,128,0,1,fp8,fp8,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,40,128,0,1,float16,float16,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,40,128,0,1,float16,fp8,0,0.05034666756788889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,40,128,0,1,fp8,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,1,128,0,1,float16,float16,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,1,128,0,1,float16,fp8,0,0.04727466901143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,1,128,0,1,fp8,fp8,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,2,128,0,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,2,128,0,1,float16,float16,0,0.04744533201058706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,2,128,0,1,fp8,fp8,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,4,128,0,1,float16,float16,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,4,128,0,1,fp8,fp8,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,8,128,0,1,float16,float16,0,0.04761599997679392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,8,128,0,1,float16,fp8,0,0.04727466901143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,4,128,0,1,float16,fp8,0,0.04727466901143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,40,8,128,0,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,40,128,0,1,float16,float16,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,40,128,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,40,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,1,128,0,1,float16,float16,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,1,128,0,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,1,128,0,1,float16,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,2,128,0,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,2,128,0,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,4,128,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,2,128,0,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,4,128,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,8,128,0,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,4,128,0,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,8,128,0,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,40,128,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,40,8,128,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,40,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,40,128,0,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,1,128,0,1,float16,float16,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,1,128,0,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,2,128,0,1,float16,float16,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,1,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,2,128,0,1,float16,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,2,128,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,4,128,0,1,float16,float16,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,4,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,4,128,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,8,128,0,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,8,128,0,1,float16,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,40,8,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,1,128,0,1,float16,float16,0,2.179413318634033
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,1,128,0,1,fp8,fp8,0,1.4912853240966797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,1,128,0,1,float16,fp8,0,2.1736106872558594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,2,128,0,1,float16,float16,0,2.2444373766581216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,2,128,0,1,fp8,fp8,0,1.5825920104980469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,2,128,0,1,float16,fp8,0,2.2301012674967446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,4,128,0,1,float16,float16,0,2.415616035461426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,4,128,0,1,float16,fp8,0,2.3866027196248374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,4,128,0,1,fp8,fp8,0,1.7704960505167644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,8,128,0,1,float16,float16,0,2.7356160481770835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,8,128,0,1,float16,fp8,0,2.678954760233561
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,40,8,128,0,1,fp8,fp8,0,2.041855971018473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,1,128,0,1,float16,float16,0,1.0565973122914631
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,40,128,0,1,float16,float16,0,2.6236586570739746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,40,128,0,1,fp8,fp8,0,2.12172794342041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,40,128,0,1,float16,fp8,0,2.4802986780802407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,1,128,0,1,float16,fp8,0,1.057792027791341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,1,128,0,1,fp8,fp8,0,0.6987093289693197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,2,128,0,1,float16,float16,0,1.0963626702626545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,2,128,0,1,fp8,fp8,0,0.7386453151702881
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,2,128,0,1,float16,fp8,0,1.0902187029520671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,4,128,0,1,float16,float16,0,1.1828906536102295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,4,128,0,1,float16,fp8,0,1.1724800268809001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,4,128,0,1,fp8,fp8,0,0.815445343653361
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,8,128,0,1,float16,float16,0,1.3504853248596191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,8,128,0,1,float16,fp8,0,1.3120853106180828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,40,8,128,0,1,fp8,fp8,0,0.9632426897684733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,40,128,0,1,float16,float16,0,1.2962133089701335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,40,128,0,1,float16,fp8,0,1.2264106273651123
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,40,128,0,1,fp8,fp8,0,1.001301368077596
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,1,128,0,1,float16,float16,0,0.4889599879582723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,1,128,0,1,fp8,fp8,0,0.3380906581878662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,1,128,0,1,float16,fp8,0,0.4891306559244792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,2,128,0,1,float16,float16,0,0.5114880005518595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,2,128,0,1,float16,fp8,0,0.5072213411331177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,2,128,0,1,fp8,fp8,0,0.35891199111938477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,4,128,0,1,float16,fp8,0,0.5585920015970866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,4,128,0,1,fp8,fp8,0,0.37410132090250653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,4,128,0,1,float16,float16,0,0.5732693274815878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,8,128,0,1,float16,float16,0,0.6234453519185384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,8,128,0,1,float16,fp8,0,0.6063786745071411
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,40,8,128,0,1,fp8,fp8,0,0.43775999546051025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,40,128,0,1,float16,float16,0,0.6010880072911581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,40,128,0,1,float16,fp8,0,0.5533013343811035
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,1,128,0,1,float16,float16,0,0.13482667009035745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,40,128,0,1,fp8,fp8,0,0.4403200149536133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,1,128,0,1,float16,fp8,0,0.13158399860064188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,1,128,0,1,fp8,fp8,0,0.08430932958920796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,2,128,0,1,float16,fp8,0,0.13175466656684875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,2,128,0,1,float16,float16,0,0.12970667084058127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,2,128,0,1,fp8,fp8,0,0.08567466338475545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,4,128,0,1,float16,float16,0,0.14267733693122864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,4,128,0,1,fp8,fp8,0,0.09130666653315227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,4,128,0,1,float16,fp8,0,0.13704533378283182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,8,128,0,1,float16,float16,0,0.17407999436060587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,8,128,0,1,float16,fp8,0,0.16554666558901468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,40,8,128,0,1,fp8,fp8,0,0.1518933375676473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,40,128,0,1,float16,float16,0,0.13636266191800436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,40,128,0,1,float16,fp8,0,0.09796266754468282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,40,128,0,1,fp8,fp8,0,0.14523733655611673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,1,128,0,1,float16,float16,0,0.06092800199985504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,1,128,0,1,float16,fp8,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,1,128,0,1,fp8,fp8,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,2,128,0,1,float16,float16,0,0.06109866499900818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,2,128,0,1,float16,fp8,0,0.06109866499900818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,2,128,0,1,fp8,fp8,0,0.046762665112813316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,4,128,0,1,float16,float16,0,0.06058666606744131
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,4,128,0,1,fp8,fp8,0,0.04744533201058706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,4,128,0,1,float16,fp8,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,8,128,0,1,float16,float16,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,8,128,0,1,float16,fp8,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,40,8,128,0,1,fp8,fp8,0,0.0481279989083608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,40,128,0,1,float16,float16,0,0.04249600072701772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,1,128,0,1,float16,float16,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,40,128,0,1,float16,fp8,0,0.04027733455101649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,40,128,0,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,1,128,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,1,128,0,1,float16,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,2,128,0,1,float16,float16,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,2,128,0,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,2,128,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,4,128,0,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,8,128,0,1,float16,float16,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,4,128,0,1,fp8,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,8,128,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,8,128,0,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,40,4,128,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,40,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,40,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,40,128,0,1,fp8,fp8,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,1,128,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,1,128,0,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,1,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,2,128,0,1,float16,float16,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,2,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,2,128,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,4,128,0,1,float16,float16,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,4,128,0,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,4,128,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,8,128,0,1,float16,float16,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,8,128,0,1,float16,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,40,8,128,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,40,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,40,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,40,128,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,1,128,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,1,128,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,1,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,2,128,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,2,128,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,4,128,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,2,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,4,128,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,4,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,8,128,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,8,128,0,1,float16,fp8,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,40,8,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,40,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,40,128,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,40,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,1,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,1,128,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,1,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,2,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,2,128,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,2,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,4,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,4,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,4,128,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,8,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,8,128,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,40,8,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,1,128,0,1,float16,float16,0,1.0627413590749104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,1,128,0,1,float16,fp8,0,1.055573304494222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,1,128,0,1,fp8,fp8,0,0.7017813523610433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,2,128,0,1,float16,float16,0,1.0977280139923096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,2,128,0,1,float16,fp8,0,1.087829351425171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,2,128,0,1,fp8,fp8,0,0.7389866511027018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,4,128,0,1,float16,float16,0,1.18340269724528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,4,128,0,1,float16,fp8,0,1.1704320112864177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,4,128,0,1,fp8,fp8,0,0.8335359891255697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,8,128,0,1,float16,float16,0,1.3492906888326008
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,8,128,0,1,float16,fp8,0,1.3137919902801514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,40,8,128,0,1,fp8,fp8,0,0.9816746711730957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,40,128,0,1,float16,float16,0,1.3076480229695637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,1,128,0,1,float16,float16,0,0.4643839995066325
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,40,128,0,1,float16,fp8,0,1.233237346013387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,40,128,0,1,fp8,fp8,0,1.00437331199646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,1,128,0,1,float16,fp8,0,0.457045316696167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,1,128,0,1,fp8,fp8,0,0.3058346708615621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,2,128,0,1,float16,float16,0,0.48503466447194415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,2,128,0,1,float16,fp8,0,0.47786664962768555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,4,128,0,1,float16,float16,0,0.5369173288345337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,2,128,0,1,fp8,fp8,0,0.32477867603302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,4,128,0,1,fp8,fp8,0,0.3604480028152466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,4,128,0,1,float16,fp8,0,0.527018666267395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,8,128,0,1,float16,float16,0,0.6282240152359009
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,8,128,0,1,float16,fp8,0,0.6056960026423136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,40,8,128,0,1,fp8,fp8,0,0.434005339940389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,40,128,0,1,float16,float16,0,0.602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,40,128,0,1,float16,fp8,0,0.553984006245931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,1,128,0,1,float16,float16,0,0.11502933502197266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,1,128,0,1,float16,fp8,0,0.12032000223795573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,40,128,0,1,fp8,fp8,0,0.45158398151397705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,1,128,0,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,2,128,0,1,float16,float16,0,0.13414399822553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,2,128,0,1,float16,fp8,0,0.12834133704503378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,2,128,0,1,fp8,fp8,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,4,128,0,1,float16,float16,0,0.13397333025932312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,4,128,0,1,float16,fp8,0,0.13056000073750815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,4,128,0,1,fp8,fp8,0,0.08277333279450734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,8,128,0,1,float16,float16,0,0.1728853384653727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,8,128,0,1,float16,fp8,0,0.1599146624406179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,40,8,128,0,1,fp8,fp8,0,0.14882133404413858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,40,128,0,1,float16,float16,0,0.13090133666992188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,40,128,0,1,float16,fp8,0,0.0981333355108897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,1,128,0,1,float16,float16,0,0.053077335158983864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,40,128,0,1,fp8,fp8,0,0.13755733768145242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,1,128,0,1,float16,fp8,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,1,128,0,1,fp8,fp8,0,0.04232533276081085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,2,128,0,1,float16,float16,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,2,128,0,1,float16,fp8,0,0.05273599922657013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,2,128,0,1,fp8,fp8,0,0.04232533276081085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,4,128,0,1,float16,float16,0,0.053930665055910744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,4,128,0,1,float16,fp8,0,0.053930665055910744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,4,128,0,1,fp8,fp8,0,0.04232533276081085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,8,128,0,1,float16,float16,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,8,128,0,1,fp8,fp8,0,0.043007999658584595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,40,8,128,0,1,float16,fp8,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,40,128,0,1,float16,float16,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,40,128,0,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,40,128,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,1,128,0,1,float16,float16,0,0.03328000009059906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,1,128,0,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,1,128,0,1,fp8,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,2,128,0,1,float16,float16,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,2,128,0,1,float16,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,2,128,0,1,fp8,fp8,0,0.028330666323502857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,4,128,0,1,float16,float16,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,4,128,0,1,float16,fp8,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,4,128,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,8,128,0,1,float16,float16,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,8,128,0,1,float16,fp8,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,40,8,128,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,40,128,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,40,128,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,40,128,0,1,float16,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,1,128,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,1,128,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,2,128,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,1,128,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,2,128,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,2,128,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,4,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,4,128,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,4,128,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,8,128,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,8,128,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,40,8,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,40,128,0,1,float16,float16,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,40,128,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,1,128,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,40,128,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,1,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,1,128,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,2,128,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,2,128,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,2,128,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,4,128,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,4,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,4,128,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,8,128,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,8,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,40,8,128,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,40,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,40,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,40,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,1,128,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,1,128,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,1,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,2,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,2,128,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,2,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,4,128,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,4,128,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,4,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,8,128,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,8,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,40,8,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,40,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,40,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,40,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,1,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,1,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,1,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,2,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,2,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,2,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,4,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,4,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,4,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,8,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,8,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,40,8,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,1,128,0,1,float16,float16,0,0.45960533618927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,1,128,0,1,float16,fp8,0,0.45875199635823566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,1,128,0,1,fp8,fp8,0,0.3252906600634257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,2,128,0,1,float16,float16,0,0.4867413441340129
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,2,128,0,1,fp8,fp8,0,0.33723731835683185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,2,128,0,1,float16,fp8,0,0.4782079855600993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,4,128,0,1,float16,float16,0,0.5353813171386719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,4,128,0,1,float16,fp8,0,0.5277013381322225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,8,128,0,1,float16,float16,0,0.6283946832021078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,4,128,0,1,fp8,fp8,0,0.37546666463216144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,8,128,0,1,fp8,fp8,0,0.43929600715637207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,40,8,128,0,1,float16,fp8,0,0.6085973183314005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,40,128,0,1,float16,float16,0,0.6067200104395548
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,40,128,0,1,float16,fp8,0,0.556714653968811
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,40,128,0,1,fp8,fp8,0,0.45448533693949383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,1,128,0,1,float16,float16,0,0.11673600474993388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,1,128,0,1,float16,fp8,0,0.11724799871444702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,1,128,0,1,fp8,fp8,0,0.08533333738644917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,2,128,0,1,float16,float16,0,0.12595199545224509
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,2,128,0,1,float16,fp8,0,0.11963733037312825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,2,128,0,1,fp8,fp8,0,0.08959999680519104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,4,128,0,1,float16,float16,0,0.1378986636797587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,4,128,0,1,float16,fp8,0,0.1293653349081675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,4,128,0,1,fp8,fp8,0,0.09745066364606221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,8,128,0,1,float16,float16,0,0.17561600605646768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,8,128,0,1,float16,fp8,0,0.15820800264676413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,40,8,128,0,1,fp8,fp8,0,0.15769599874814352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,40,128,0,1,float16,float16,0,0.13943466544151306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,40,128,0,1,float16,fp8,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,40,128,0,1,fp8,fp8,0,0.1423360009988149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,1,128,0,1,float16,float16,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,1,128,0,1,float16,fp8,0,0.0554666668176651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,1,128,0,1,fp8,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,2,128,0,1,float16,float16,0,0.05529599885145823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,2,128,0,1,float16,fp8,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,2,128,0,1,fp8,fp8,0,0.04607999821503957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,4,128,0,1,float16,float16,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,4,128,0,1,float16,fp8,0,0.05580799778302511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,4,128,0,1,fp8,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,8,128,0,1,float16,float16,0,0.057855998476346336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,8,128,0,1,float16,fp8,0,0.05717333157857259
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,40,128,0,1,float16,float16,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,40,8,128,0,1,fp8,fp8,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,40,128,0,1,float16,fp8,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,40,128,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,1,128,0,1,float16,float16,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,1,128,0,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,1,128,0,1,fp8,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,2,128,0,1,float16,float16,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,2,128,0,1,float16,fp8,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,4,128,0,1,float16,float16,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,2,128,0,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,4,128,0,1,float16,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,4,128,0,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,8,128,0,1,float16,float16,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,8,128,0,1,float16,fp8,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,40,8,128,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,40,128,0,1,float16,float16,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,40,128,0,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,40,128,0,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,1,128,0,1,float16,float16,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,1,128,0,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,1,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,2,128,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,2,128,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,2,128,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,4,128,0,1,float16,float16,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,4,128,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,4,128,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,8,128,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,8,128,0,1,float16,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,40,8,128,0,1,fp8,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,40,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,40,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,40,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,1,128,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,1,128,0,1,float16,float16,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,1,128,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,2,128,0,1,float16,float16,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,2,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,2,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,4,128,0,1,float16,float16,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,4,128,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,4,128,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,8,128,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,8,128,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,40,8,128,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,40,128,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,40,128,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,40,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,1,128,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,1,128,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,1,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,2,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,2,128,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,2,128,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,4,128,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,4,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,4,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,8,128,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,8,128,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,40,8,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,40,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,40,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,40,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,1,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,1,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,1,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,2,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,2,128,0,1,float16,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,2,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,4,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,4,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,4,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,8,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,8,128,0,1,float16,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,40,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,40,8,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,40,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,40,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,1,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,2,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,4,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,8,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,8,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,40,8,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,1,128,0,1,float16,fp8,0,0.1544533371925354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,1,128,0,1,float16,float16,0,0.15752533078193665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,1,128,0,1,fp8,fp8,0,0.11639466881752014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,2,128,0,1,float16,fp8,0,0.15803733468055725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,2,128,0,1,float16,float16,0,0.1599146624406179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,2,128,0,1,fp8,fp8,0,0.11844266454378764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,4,128,0,1,float16,float16,0,0.17169066270192465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,4,128,0,1,float16,fp8,0,0.16537599762280783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,4,128,0,1,fp8,fp8,0,0.1262933313846588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,8,128,0,1,float16,fp8,0,0.18653867642084757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,8,128,0,1,float16,float16,0,0.19985065857569376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,40,8,128,0,1,fp8,fp8,0,0.18773333231608072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,40,128,0,1,float16,fp8,0,0.11315199732780457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,40,128,0,1,float16,float16,0,0.14574933052062988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,40,128,0,1,fp8,fp8,0,0.15291733543078104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,1,128,0,1,float16,float16,0,0.07748266557852428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,1,128,0,1,float16,fp8,0,0.0766293356815974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,1,128,0,1,fp8,fp8,0,0.06126933296521505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,2,128,0,1,float16,float16,0,0.077824001510938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,2,128,0,1,fp8,fp8,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,2,128,0,1,float16,fp8,0,0.07799466451009114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,4,128,0,1,float16,float16,0,0.0773119976123174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,4,128,0,1,float16,fp8,0,0.0773119976123174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,8,128,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,8,128,0,1,float16,fp8,0,0.07799466451009114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,8,128,0,1,float16,float16,0,0.07833600044250488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,40,4,128,0,1,fp8,fp8,0,0.06195199986298879
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,40,128,0,1,float16,float16,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,40,128,0,1,float16,fp8,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,40,128,0,1,fp8,fp8,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,1,128,0,1,float16,float16,0,0.04351999859015147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,1,128,0,1,float16,fp8,0,0.04351999859015147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,1,128,0,1,fp8,fp8,0,0.036864000062147774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,2,128,0,1,float16,float16,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,2,128,0,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,4,128,0,1,float16,float16,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,2,128,0,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,4,128,0,1,float16,fp8,0,0.04420266548792521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,4,128,0,1,fp8,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,8,128,0,1,float16,float16,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,8,128,0,1,float16,fp8,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,40,8,128,0,1,fp8,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,40,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,40,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,1,128,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,40,128,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,1,128,0,1,float16,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,1,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,2,128,0,1,float16,float16,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,2,128,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,2,128,0,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,4,128,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,4,128,0,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,4,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,8,128,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,8,128,0,1,float16,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,40,8,128,0,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,40,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,40,128,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,40,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,1,128,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,1,128,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,1,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,2,128,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,2,128,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,4,128,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,2,128,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,4,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,4,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,8,128,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,8,128,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,40,8,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,40,128,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,40,128,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,1,128,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,40,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,1,128,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,1,128,0,1,fp8,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,2,128,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,2,128,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,2,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,4,128,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,4,128,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,4,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,8,128,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,8,128,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,40,8,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,40,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,40,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,40,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,1,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,2,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,2,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,4,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,2,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,4,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,8,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,4,128,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,8,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,40,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,40,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,40,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,40,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,1,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,1,128,0,1,float16,fp8,0,0.010053333515922228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,4,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,4,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,8,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,8,128,0,1,float16,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,40,8,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,40,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,40,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,40,128,0,1,fp8,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,4,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,4,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,8,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,8,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,40,8,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,1,128,0,1,fp8,fp8,0,69.70282491048177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,2,128,0,1,fp8,fp8,0,68.35012308756511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,float16,0,116.9090576171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,float16,0,114.06387329101562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,fp8,0,112.43810017903645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,fp8,0,115.3213399251302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,float16,0,113.03406778971355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,fp8,0,115.71438598632812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,4,128,0,1,fp8,fp8,0,70.53465779622395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,32,128,0,1,fp8,fp8,0,35.09657541910807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,float16,0,57.989288330078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,fp8,0,58.64226277669271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,float16,0,56.15086873372396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,8,128,0,1,fp8,fp8,0,70.11703491210938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,float16,0,114.21507771809895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,1,128,0,1,fp8,fp8,0,32.57787831624349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,fp8,0,56.032938639322914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,2,128,0,1,fp8,fp8,0,33.07605234781901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,fp8,0,117.40416463216145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,float16,0,55.73700459798177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,fp8,0,55.315287272135414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,4,128,0,1,fp8,fp8,0,32.48315684000651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,float16,0,55.292928059895836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,fp8,0,55.131134033203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,32,128,0,1,fp8,fp8,0,17.288191477457683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,float16,0,57.01495361328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,8,128,0,1,fp8,fp8,0,34.36885325113932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,float16,0,29.067776997884113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,fp8,0,29.252609252929688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,fp8,0,56.95061238606771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,float16,0,27.62291208902995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,fp8,0,27.34711456298828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,1,128,0,1,fp8,fp8,0,16.26862970987956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,2,128,0,1,fp8,fp8,0,16.505172729492188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,float16,0,28.043263753255207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,fp8,0,28.381014506022137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,4,128,0,1,fp8,fp8,0,16.486228942871094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,float16,0,27.785898844401043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,fp8,0,28.238848368326824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,float16,0,27.734697977701824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,8,128,0,1,fp8,fp8,0,16.900948842366535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,fp8,0,27.67974344889323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,32,128,0,1,fp8,fp8,0,8.907434463500977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,float16,0,14.908757527669271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,fp8,0,15.08181381225586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,float16,0,14.058666229248047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,1,128,0,1,fp8,fp8,0,8.329216003417969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,fp8,0,14.062763214111328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,2,128,0,1,fp8,fp8,0,8.382464090983072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,float16,0,13.694464365641275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,fp8,0,13.711359659830729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,float16,0,14.082047780354818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,4,128,0,1,fp8,fp8,0,8.629077275594076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,fp8,0,14.439083099365234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,float16,0,14.035114288330078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,8,128,0,1,fp8,fp8,0,8.521898905436197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,fp8,0,14.457003275553385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,1,128,0,1,fp8,fp8,0,39.473663330078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,2,128,0,1,fp8,fp8,0,37.86205800374349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,float16,0,65.80479939778645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,fp8,0,65.1315205891927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,float16,0,66.00687154134114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,fp8,0,67.61830647786458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,float16,0,65.80121358235677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,fp8,0,66.70489501953125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,4,128,0,1,fp8,fp8,0,39.25316365559896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,32,128,0,1,fp8,fp8,0,20.491605122884113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,8,128,0,1,fp8,fp8,0,39.47724914550781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,float16,0,34.25928497314453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,fp8,0,34.55675760904948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,float16,0,32.23193613688151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,float16,0,66.03844197591145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,fp8,0,32.10990905761719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,fp8,0,66.06814066569011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,1,128,0,1,fp8,fp8,0,19.102378845214844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,2,128,0,1,fp8,fp8,0,19.45309829711914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,float16,0,31.10980224609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,fp8,0,31.93053944905599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,float16,0,32.35208638509115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,4,128,0,1,fp8,fp8,0,19.600213368733723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,fp8,0,31.763455708821613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,float16,0,31.902720133463543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,float16,0,16.705877939860027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,fp8,0,16.91101837158203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,8,128,0,1,fp8,fp8,0,20.314795176188152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,fp8,0,32.12646484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,32,128,0,1,fp8,fp8,0,10.169002532958984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,float16,0,16.24098078409831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,fp8,0,15.78769048055013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,1,128,0,1,fp8,fp8,0,9.825450897216797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,2,128,0,1,fp8,fp8,0,9.89849599202474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,float16,0,15.803050994873047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,fp8,0,16.308736165364582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,float16,0,16.072021484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,fp8,0,15.92627207438151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,4,128,0,1,fp8,fp8,0,9.821184158325195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,float16,0,16.01023991902669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,fp8,0,15.88821283976237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,32,8,128,0,1,fp8,fp8,0,10.056533177693685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,float16,0,8.43332290649414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,32,128,0,1,fp8,fp8,0,5.075285275777181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,fp8,0,8.745471954345703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,float16,0,7.9684263865153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,fp8,0,7.9904429117838545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,1,128,0,1,fp8,fp8,0,4.250624020894368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,2,128,0,1,fp8,fp8,0,4.248234748840332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,float16,0,8.083797454833984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,fp8,0,8.365055720011393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,float16,0,7.823189417521159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,fp8,0,8.411477406819662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,4,128,0,1,fp8,fp8,0,4.409173329671224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,float16,0,8.378880182902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,fp8,0,7.935658772786458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,32,8,128,0,1,fp8,fp8,0,4.706474622090657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,1,128,0,1,fp8,fp8,0,27.070292154947918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,2,128,0,1,fp8,fp8,0,27.774805704752605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,float16,0,44.43852742513021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,fp8,0,46.539774576822914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,float16,0,45.18109639485677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,fp8,0,44.759552001953125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,float16,0,44.95752461751302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,fp8,0,44.2251942952474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,4,128,0,1,fp8,fp8,0,27.36469268798828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,32,128,0,1,fp8,fp8,0,15.241386413574219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,float16,0,23.870463053385418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,fp8,0,24.042154947916668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,8,128,0,1,fp8,fp8,0,28.774742126464844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,float16,0,46.31330362955729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,float16,0,22.375254313151043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,fp8,0,45.36883036295573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,fp8,0,22.388394673665363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,1,128,0,1,fp8,fp8,0,13.410303751627604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,2,128,0,1,fp8,fp8,0,13.90933354695638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,fp8,0,22.226603190104168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,float16,0,22.55633036295573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,4,128,0,1,fp8,fp8,0,13.460992177327475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,fp8,0,22.246571858723957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,float16,0,22.8495356241862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,float16,0,22.80175018310547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,8,128,0,1,fp8,fp8,0,14.082901000976562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,fp8,0,22.52697499593099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,float16,0,12.17245864868164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,fp8,0,12.254890441894531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,32,128,0,1,fp8,fp8,0,7.470933278401692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,float16,0,11.351552327473959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,fp8,0,11.489791870117188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,1,128,0,1,fp8,fp8,0,6.416213353474935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,2,128,0,1,fp8,fp8,0,6.838101069132487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,float16,0,11.733503977457682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,fp8,0,11.290964762369791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,float16,0,11.502250671386719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,fp8,0,11.56164296468099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,4,128,0,1,fp8,fp8,0,6.413482666015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,float16,0,11.639979044596354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,fp8,0,11.394218444824219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,32,8,128,0,1,fp8,fp8,0,6.901418685913086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,32,128,0,1,fp8,fp8,0,3.7258240381876626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,fp8,0,5.9557545979817705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,float16,0,6.14297612508138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,float16,0,5.029717445373535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,fp8,0,5.207381248474121
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,1,128,0,1,fp8,fp8,0,3.142826716105143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,float16,0,5.59547742207845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,2,128,0,1,fp8,fp8,0,3.1313918431599936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,fp8,0,5.675519943237305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,4,128,0,1,fp8,fp8,0,3.215872128804525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,float16,0,5.217621485392253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,fp8,0,5.753344217936198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,float16,0,5.64241091410319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,fp8,0,5.670229593912761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,32,8,128,0,1,fp8,fp8,0,3.147775967915853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,1,128,0,1,fp8,fp8,0,36.37230936686198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,2,128,0,1,fp8,fp8,0,36.336639404296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,float16,0,61.10003153483073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,fp8,0,61.11539204915365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,float16,0,60.59366353352865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,fp8,0,61.92606099446615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,float16,0,61.59923299153646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,fp8,0,60.2067616780599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,4,128,0,1,fp8,fp8,0,37.105150858561196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,32,128,0,1,fp8,fp8,0,20.342442830403645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,float16,0,31.95647939046224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,float16,0,29.357396443684895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,8,128,0,1,fp8,fp8,0,38.20390319824219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,fp8,0,32.853162129720054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,float16,0,61.15925089518229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,fp8,0,29.351425170898438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,fp8,0,62.68262227376302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,1,128,0,1,fp8,fp8,0,17.898666381835938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,2,128,0,1,fp8,fp8,0,17.700010935465496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,float16,0,29.388628641764324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,fp8,0,29.45757802327474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,float16,0,30.258346557617188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,fp8,0,29.184000651041668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,4,128,0,1,fp8,fp8,0,17.745066324869793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,8,128,0,1,fp8,fp8,0,18.542420705159504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,float16,0,30.223360697428387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,fp8,0,30.401878356933594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,float16,0,15.82916259765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,32,128,0,1,fp8,fp8,0,9.945770899454752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,fp8,0,16.07321548461914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,float16,0,14.547968546549479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,fp8,0,14.728703816731771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,1,128,0,1,fp8,fp8,0,8.867839813232422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,2,128,0,1,fp8,fp8,0,9.112746556599935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,float16,0,14.782975514729818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,4,128,0,1,fp8,fp8,0,9.116501490275065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,fp8,0,14.857898712158203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,float16,0,15.058090209960938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,fp8,0,14.748672485351562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,float16,0,14.983851114908854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,8,128,0,1,fp8,fp8,0,8.99072011311849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,32,128,0,1,fp8,fp8,0,5.065557479858398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,fp8,0,14.98248545328776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,float16,0,8.223061243693033
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,fp8,0,7.543978373209636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,float16,0,7.226709365844727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,fp8,0,7.122090657552083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,1,128,0,1,fp8,fp8,0,4.146346728006999
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,2,128,0,1,fp8,fp8,0,3.9736318588256836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,float16,0,7.428266525268555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,fp8,0,7.023786544799805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,4,128,0,1,fp8,fp8,0,4.400981267293294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,fp8,0,7.129088083902995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,float16,0,7.639210383097331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,float16,0,7.318016052246094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,fp8,0,7.285077412923177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,32,8,128,0,1,fp8,fp8,0,4.15556271870931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,float16,0,3.859285354614258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,fp8,0,3.917311986287435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,32,128,0,1,fp8,fp8,0,2.5311573346455893
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,1,128,0,1,fp8,fp8,0,1.9672746658325195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,float16,0,3.3423360188802085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,fp8,0,3.4450772603352866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,float16,0,3.259221394856771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,2,128,0,1,fp8,fp8,0,1.9792213439941406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,fp8,0,3.3390932083129883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,4,128,0,1,fp8,fp8,0,2.012159983317057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,float16,0,3.538090705871582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,fp8,0,3.4367148081461587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,float16,0,3.4978132247924805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,fp8,0,3.537407875061035
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,32,8,128,0,1,fp8,fp8,0,2.1352106730143228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,1,128,0,1,fp8,fp8,0,20.377941131591797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,2,128,0,1,fp8,fp8,0,19.785216013590496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,float16,0,34.62826792399088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,fp8,0,34.47466786702474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,float16,0,34.899454752604164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,fp8,0,35.534507751464844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,float16,0,35.0745595296224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,fp8,0,34.4432627360026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,4,128,0,1,fp8,fp8,0,20.20522689819336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,32,128,0,1,fp8,fp8,0,12.526933034261068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,float16,0,20.129109700520832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,8,128,0,1,fp8,fp8,0,22.32507832845052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,fp8,0,19.983871459960938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,float16,0,34.73936971028646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,float16,0,17.054378509521484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,fp8,0,35.25341796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,fp8,0,16.990549723307293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,1,128,0,1,fp8,fp8,0,10.736981709798178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,2,128,0,1,fp8,fp8,0,10.39035733540853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,float16,0,17.280340830485027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,fp8,0,17.07263946533203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,float16,0,17.031509399414062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,fp8,0,17.137152353922527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,4,128,0,1,fp8,fp8,0,10.229077021280924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,float16,0,17.49896494547526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,8,128,0,1,fp8,fp8,0,11.181055704752604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,fp8,0,17.72407404581706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,32,128,0,1,fp8,fp8,0,6.206975936889648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,float16,0,9.587711970011393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,float16,0,8.141824086507162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,fp8,0,9.393493016560873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,fp8,0,8.44595209757487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,1,128,0,1,fp8,fp8,0,4.9430185953776045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,2,128,0,1,fp8,fp8,0,5.014698664347331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,float16,0,8.763903935750326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,4,128,0,1,fp8,fp8,0,4.730026563008626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,fp8,0,8.779776255289713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,float16,0,9.045674641927084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,fp8,0,8.508415857950846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,float16,0,8.385194778442383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,fp8,0,8.44049072265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,32,8,128,0,1,fp8,fp8,0,5.184512138366699
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,32,128,0,1,fp8,fp8,0,3.134976069132487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,float16,0,4.699647903442383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,fp8,0,4.618751843770345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,float16,0,4.062037467956543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,fp8,0,3.976874669392904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,1,128,0,1,fp8,fp8,0,2.284543991088867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,float16,0,3.9635626475016275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,2,128,0,1,fp8,fp8,0,2.4064000447591147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,fp8,0,3.917311986287435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,4,128,0,1,fp8,fp8,0,2.4883200327555337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,float16,0,4.062719980875651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,fp8,0,4.049749374389648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,float16,0,4.177578608194987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,fp8,0,3.920554796854655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,32,8,128,0,1,fp8,fp8,0,2.5060693422953286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,fp8,0,2.2797652880350747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,float16,0,2.3700480461120605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,32,128,0,1,fp8,fp8,0,1.5452159245808919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,float16,0,1.949354648590088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,fp8,0,1.9213652610778809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,1,128,0,1,fp8,fp8,0,1.1946667035420735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,float16,0,1.8853546778361003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,fp8,0,1.8911573092142742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,2,128,0,1,fp8,fp8,0,1.1816960175832112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,float16,0,1.9491839408874512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,fp8,0,1.9875839551289876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,4,128,0,1,fp8,fp8,0,1.195690631866455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,float16,0,1.9618132909138997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,fp8,0,1.9616427421569824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,32,8,128,0,1,fp8,fp8,0,1.2141226927439372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,1,128,0,1,fp8,fp8,0,21.619542439778645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,2,128,0,1,fp8,fp8,0,20.18781916300456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,float16,0,33.23221333821615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,fp8,0,33.26702880859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,float16,0,34.70284779866537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,fp8,0,34.01062520345052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,float16,0,33.49623362223307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,fp8,0,33.3310292561849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,4,128,0,1,fp8,fp8,0,22.04381815592448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,32,128,0,1,fp8,fp8,0,13.327359517415365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,float16,0,19.722240447998047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,float16,0,16.065536499023438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,fp8,0,19.177813212076824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,8,128,0,1,fp8,fp8,0,22.4718500773112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,float16,0,34.129066467285156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,fp8,0,34.31304423014323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,1,128,0,1,fp8,fp8,0,10.024960199991861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,fp8,0,16.145919799804688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,2,128,0,1,fp8,fp8,0,10.320725123087565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,fp8,0,15.86346689860026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,float16,0,16.380757649739582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,float16,0,16.103936513264973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,4,128,0,1,fp8,fp8,0,10.265941619873047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,fp8,0,16.244394938151043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,8,128,0,1,fp8,fp8,0,10.621269226074219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,float16,0,16.530943552652996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,fp8,0,16.59665044148763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,32,128,0,1,fp8,fp8,0,6.596437454223633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,float16,0,9.144490559895834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,fp8,0,9.389055887858072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,float16,0,7.9467519124348955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,fp8,0,7.950677235921224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,1,128,0,1,fp8,fp8,0,4.839424133300781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,2,128,0,1,fp8,fp8,0,4.677290598551433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,float16,0,7.992832183837891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,4,128,0,1,fp8,fp8,0,4.749653180440267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,fp8,0,8.241493225097656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,float16,0,8.189098358154297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,fp8,0,8.354474385579428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,float16,0,8.016554514567057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,8,128,0,1,fp8,fp8,0,5.063680013020833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,float16,0,4.673023859659831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,fp8,0,4.565845489501953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,32,128,0,1,fp8,fp8,0,3.2696320215861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,fp8,0,8.467114766438803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,1,128,0,1,fp8,fp8,0,2.2516053517659507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,float16,0,3.6167678833007812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,fp8,0,3.7657601038614907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,2,128,0,1,fp8,fp8,0,2.267306645711263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,float16,0,3.7160959243774414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,fp8,0,3.7186559041341147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,4,128,0,1,fp8,fp8,0,2.341205279032389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,fp8,0,3.766442616780599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,float16,0,3.8333441416422525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,float16,0,3.915264129638672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,fp8,0,3.949397404988607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,32,8,128,0,1,fp8,fp8,0,2.4436052640279136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,float16,0,2.308608055114746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,fp8,0,2.2860800425211587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,32,128,0,1,fp8,fp8,0,1.5749120712280273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,float16,0,1.7662293116251628
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,fp8,0,1.7769813537597656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,1,128,0,1,fp8,fp8,0,1.0772480169932048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,float16,0,1.7991679509480794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,fp8,0,1.7578667004903157
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,2,128,0,1,fp8,fp8,0,1.102847973505656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,float16,0,1.8078719774882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,4,128,0,1,fp8,fp8,0,1.1482453346252441
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,fp8,0,1.7865386009216309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,float16,0,1.9345067342122395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,fp8,0,1.85753599802653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,32,8,128,0,1,fp8,fp8,0,1.2598613103230794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,float16,0,1.1569493611653645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,fp8,0,1.125205357869466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,float16,0,0.9262080192565918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,fp8,0,0.9169920285542806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,32,128,0,1,fp8,fp8,0,0.7838719685872396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,1,128,0,1,fp8,fp8,0,0.5596160093943278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,float16,0,0.9094826380411783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,fp8,0,0.9461759726206461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,2,128,0,1,fp8,fp8,0,0.5548373460769653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,float16,0,0.9168213208516439
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,4,128,0,1,fp8,fp8,0,0.5618346532185873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,fp8,0,0.9267199834187826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,float16,0,0.92740265528361
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,8,128,0,1,fp8,fp8,0,0.5661013523737589
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,fp8,0,0.936959981918335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,float16,0,19.851776123046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,1,128,0,1,fp8,fp8,0,12.250111897786459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,2,128,0,1,fp8,fp8,0,12.4136110941569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,fp8,0,19.74835205078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,float16,0,19.67803700764974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,fp8,0,20.32571792602539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,fp8,0,19.739988962809246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,float16,0,19.85757827758789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,4,128,0,1,fp8,fp8,0,12.8810666402181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,float16,0,19.721215565999348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,float16,0,9.336490631103516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,float16,0,11.58963139851888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,32,128,0,1,fp8,fp8,0,8.666282653808594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,8,128,0,1,fp8,fp8,0,13.661354064941406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,fp8,0,11.817131042480469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,fp8,0,19.431424458821613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,fp8,0,9.766229629516602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,1,128,0,1,fp8,fp8,0,5.603157043457031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,2,128,0,1,fp8,fp8,0,5.9136002858479815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,4,128,0,1,fp8,fp8,0,5.881173451741536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,fp8,0,9.513130823771158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,float16,0,9.591295878092447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,float16,0,9.786368052164713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,fp8,0,9.891839981079102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,float16,0,9.925461451212565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,8,128,0,1,fp8,fp8,0,6.432597478230794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,float16,0,4.311210632324219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,32,128,0,1,fp8,fp8,0,4.203690528869629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,float16,0,5.812394460042317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,fp8,0,9.69045321146647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,fp8,0,5.772117614746094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,fp8,0,4.379136085510254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,1,128,0,1,fp8,fp8,0,2.8231681187947593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,2,128,0,1,fp8,fp8,0,2.7687253952026367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,fp8,0,4.503722508748372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,4,128,0,1,fp8,fp8,0,2.884096145629883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,float16,0,4.5714772542317705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,float16,0,4.787712097167969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,float16,0,4.553045272827148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,fp8,0,4.436821301778157
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,fp8,0,4.771498680114746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,float16,0,2.8835840225219727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,32,8,128,0,1,fp8,fp8,0,3.0670506159464517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,32,128,0,1,fp8,fp8,0,2.0754772822062173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,fp8,0,2.856106758117676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,float16,0,2.1753172874450684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,fp8,0,2.1505707105000815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,1,128,0,1,fp8,fp8,0,1.35150941212972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,float16,0,2.2193493843078613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,2,128,0,1,fp8,fp8,0,1.371989409128825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,fp8,0,2.137770652770996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,fp8,0,2.220714728037516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,float16,0,2.2674773534139
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,4,128,0,1,fp8,fp8,0,1.423701286315918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,float16,0,2.3282346725463867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,fp8,0,2.2901760737101235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,32,8,128,0,1,fp8,fp8,0,1.5522133509318035
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,fp8,0,1.4037334124247234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,float16,0,1.4349652926127117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,32,128,0,1,fp8,fp8,0,1.0036906401316326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,fp8,0,1.0347519715627034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,float16,0,1.0422613620758057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,1,128,0,1,fp8,fp8,0,0.6447786490122477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,fp8,0,1.055402676264445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,2,128,0,1,fp8,fp8,0,0.6381226778030396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,float16,0,1.0269013245900471
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,float16,0,1.0403839747111003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,4,128,0,1,fp8,fp8,0,0.6746453444163004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,fp8,0,1.0448213418324788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,float16,0,1.1115520000457764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,fp8,0,1.0832213560740154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,32,8,128,0,1,fp8,fp8,0,0.7328426837921143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,float16,0,0.6437546809514364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,32,128,0,1,fp8,fp8,0,0.4551680088043213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,fp8,0,0.5544960101445516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,float16,0,0.5558613141377767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,1,128,0,1,fp8,fp8,0,0.35276798407236737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,fp8,0,0.6004053354263306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,fp8,0,0.5792426665623983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,float16,0,0.5655893484751383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,2,128,0,1,fp8,fp8,0,0.3473066488901774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,float16,0,0.5560319821039835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,fp8,0,0.5620053211847941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,4,128,0,1,fp8,fp8,0,0.3433813254038493
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,float16,0,0.5831679900487264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,fp8,0,0.567466656366984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,32,8,128,0,1,fp8,fp8,0,0.3543039957682292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,1,128,0,1,fp8,fp8,0,12.932608286539713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,2,128,0,1,fp8,fp8,0,13.611860911051432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,fp8,0,19.977728525797527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,float16,0,19.46845881144206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,float16,0,20.065962473551433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,fp8,0,20.375381469726562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,float16,0,19.285675048828125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,fp8,0,19.796138763427734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,4,128,0,1,fp8,fp8,0,13.605716705322266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,fp8,0,12.516010284423828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,float16,0,12.464640299479166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,8,128,0,1,fp8,fp8,0,15.191722869873047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,float16,0,21.0164057413737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,32,128,0,1,fp8,fp8,0,9.649152119954428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,fp8,0,20.3863042195638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,float16,0,9.316522598266602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,fp8,0,9.353386561075846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,1,128,0,1,fp8,fp8,0,5.960533142089844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,2,128,0,1,fp8,fp8,0,6.049109141031901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,float16,0,8.992085138956705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,fp8,0,9.616383870442709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,4,128,0,1,fp8,fp8,0,6.400170644124349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,float16,0,9.30235735575358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,fp8,0,9.352874755859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,float16,0,9.779882431030273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,fp8,0,9.692501068115234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,32,128,0,1,fp8,fp8,0,4.83242670694987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,32,8,128,0,1,fp8,fp8,0,6.989824295043945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,fp8,0,6.114304224650065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,fp8,0,4.340565363566081
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,float16,0,6.211072285970052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,float16,0,4.3787946701049805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,1,128,0,1,fp8,fp8,0,2.7876691818237305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,float16,0,4.544853210449219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,2,128,0,1,fp8,fp8,0,2.938197453816732
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,fp8,0,4.5434878667195635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,float16,0,4.531711896260579
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,fp8,0,4.427263895670573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,4,128,0,1,fp8,fp8,0,3.024042765299479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,float16,0,4.741973241170247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,fp8,0,4.702037175496419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,32,8,128,0,1,fp8,fp8,0,3.303253491719564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,fp8,0,3.1124480565389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,float16,0,3.133610725402832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,float16,0,2.1265066464742026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,32,128,0,1,fp8,fp8,0,2.314922650655111
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,fp8,0,2.1587626139322915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,1,128,0,1,fp8,fp8,0,1.368234634399414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,float16,0,2.1602986653645835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,2,128,0,1,fp8,fp8,0,1.4417920112609863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,fp8,0,2.1969920794169107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,4,128,0,1,fp8,fp8,0,1.463637351989746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,fp8,0,2.2111573219299316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,float16,0,2.2688426971435547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,float16,0,2.3722666104634604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,fp8,0,2.3328426678975425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,32,8,128,0,1,fp8,fp8,0,1.5639893213907878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,float16,0,1.5569920539855957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,float16,0,0.9919146696726481
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,fp8,0,1.5028907457987468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,32,128,0,1,fp8,fp8,0,1.1013120015462239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,fp8,0,1.0105173587799072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,1,128,0,1,fp8,fp8,0,0.6471680005391439
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,float16,0,1.005226691563924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,fp8,0,0.9997653166453043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,2,128,0,1,fp8,fp8,0,0.6852266788482666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,float16,0,1.0461866855621338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,4,128,0,1,fp8,fp8,0,0.7215786774953207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,fp8,0,1.0586453278859456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,float16,0,1.1559253533681233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,fp8,0,1.1274240016937256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,32,8,128,0,1,fp8,fp8,0,0.7809706528981527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,float16,0,0.7326719760894775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,fp8,0,0.7101439634958903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,32,128,0,1,fp8,fp8,0,0.5288960138956705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,fp8,0,0.5191680192947388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,float16,0,0.5109759966532389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,1,128,0,1,fp8,fp8,0,0.3150506615638733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,float16,0,0.5169493357340494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,fp8,0,0.5171200037002563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,2,128,0,1,fp8,fp8,0,0.3065173427263896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,float16,0,0.5207039912541708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,fp8,0,0.5150719881057739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,4,128,0,1,fp8,fp8,0,0.31061333417892456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,float16,0,0.5104639927546183
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,fp8,0,0.5227520068486532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,32,8,128,0,1,fp8,fp8,0,0.33075199524561566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,fp8,0,0.2955946723620097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,32,128,0,1,fp8,fp8,0,0.20258132616678873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,float16,0,0.2730666597684224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,float16,0,0.30293333530426025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,fp8,0,0.2725546757380168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,1,128,0,1,fp8,fp8,0,0.1807360053062439
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,float16,0,0.2764799992243449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,2,128,0,1,fp8,fp8,0,0.18056533734003702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,fp8,0,0.2786986629168193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,fp8,0,0.2769920031229655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,float16,0,0.285866657892863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,float16,0,0.2882560094197591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,fp8,0,0.28330665826797485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,4,128,0,1,fp8,fp8,0,0.18227199713389078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,32,8,128,0,1,fp8,fp8,0,0.18295466899871826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,1,128,0,1,fp8,fp8,0,7.988735834757487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,2,128,0,1,fp8,fp8,0,8.137898763020834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,float16,0,11.484842936197916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,fp8,0,11.46572748819987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,float16,0,11.844095865885416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,fp8,0,11.736063639322916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,float16,0,11.730944315592447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,fp8,0,11.872426350911459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,4,128,0,1,fp8,fp8,0,8.641365051269531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,float16,0,8.236885070800781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,fp8,0,8.26419194539388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,float16,0,5.480960210164388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,float16,0,12.450645446777344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,32,128,0,1,fp8,fp8,0,6.496255874633789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,8,128,0,1,fp8,fp8,0,9.52558962504069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,fp8,0,12.29806900024414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,fp8,0,5.359957377115886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,1,128,0,1,fp8,fp8,0,3.67359987894694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,float16,0,5.616810480753581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,2,128,0,1,fp8,fp8,0,3.7435731887817383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,fp8,0,5.577045440673828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,4,128,0,1,fp8,fp8,0,3.9075838724772134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,float16,0,5.595647811889648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,fp8,0,5.653674443562825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,float16,0,5.9450028737386065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,fp8,0,6.14570681254069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,float16,0,4.0651092529296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,fp8,0,4.065962791442871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,32,8,128,0,1,fp8,fp8,0,4.391765276590983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,32,128,0,1,fp8,fp8,0,3.165013313293457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,fp8,0,2.674346605936686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,float16,0,2.6886825561523438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,1,128,0,1,fp8,fp8,0,1.772714614868164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,2,128,0,1,fp8,fp8,0,1.8626559575398762
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,float16,0,2.7893759409586587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,fp8,0,2.700629234313965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,fp8,0,2.79313055674235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,float16,0,2.8788054784139
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,4,128,0,1,fp8,fp8,0,1.9031039873758953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,float16,0,2.984618822733561
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,fp8,0,2.941439946492513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,32,8,128,0,1,fp8,fp8,0,2.097322622934977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,float16,0,2.0565333366394043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,float16,0,1.3098666667938232
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,32,128,0,1,fp8,fp8,0,1.5230293273925781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,fp8,0,1.9819520314534504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,fp8,0,1.2970666885375977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,1,128,0,1,fp8,fp8,0,0.8611839612325033
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,float16,0,1.3105493386586506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,2,128,0,1,fp8,fp8,0,0.8997546831766764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,fp8,0,1.3282986481984456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,float16,0,1.3900799751281738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,4,128,0,1,fp8,fp8,0,0.9280853271484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,fp8,0,1.3521919250488281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,float16,0,1.472511927286784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,fp8,0,1.4510080019632976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,32,8,128,0,1,fp8,fp8,0,1.0098346869150798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,float16,0,0.9980586369832357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,32,128,0,1,fp8,fp8,0,0.7492266496022543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,float16,0,0.5870933135350546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,fp8,0,0.9562453428904215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,fp8,0,0.5821439822514852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,1,128,0,1,fp8,fp8,0,0.3882666826248169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,float16,0,0.5947733322779337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,fp8,0,0.5911893447240194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,2,128,0,1,fp8,fp8,0,0.39185067017873126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,float16,0,0.6183253526687622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,fp8,0,0.6043306589126587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,4,128,0,1,fp8,fp8,0,0.4225706656773885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,float16,0,0.661845326423645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,fp8,0,0.6478506724039713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,32,8,128,0,1,fp8,fp8,0,0.4681386550267537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,float16,0,0.4241066773732503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,fp8,0,0.381440003712972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,32,128,0,1,fp8,fp8,0,0.32819199562072754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,float16,0,0.3104426662127177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,1,128,0,1,fp8,fp8,0,0.19114667177200317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,fp8,0,0.3155626654624939
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,float16,0,0.3160746693611145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,fp8,0,0.3141973416010539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,2,128,0,1,fp8,fp8,0,0.1955839991569519
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,float16,0,0.32051199674606323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,4,128,0,1,fp8,fp8,0,0.1914880077044169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,fp8,0,0.3259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,float16,0,0.31334400177001953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,fp8,0,0.32665600379308063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,fp8,0,0.18500266472498575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,32,8,128,0,1,fp8,fp8,0,0.19848533471425375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,float16,0,0.19029333194096884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,32,128,0,1,fp8,fp8,0,0.12424533565839131
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,float16,0,0.17493333419164023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,fp8,0,0.1718613306681315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,1,128,0,1,fp8,fp8,0,0.11895466844240825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,float16,0,0.17339734236399332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,fp8,0,0.17322667439778647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,float16,0,0.17561600605646768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,2,128,0,1,fp8,fp8,0,0.11997866630554199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,fp8,0,0.17681066195170084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,4,128,0,1,fp8,fp8,0,0.11997866630554199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,float16,0,0.17339734236399332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,fp8,0,0.1718613306681315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,32,8,128,0,1,fp8,fp8,0,0.11997866630554199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,float16,0,11.617621103922525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,1,128,0,1,fp8,fp8,0,8.075775782267252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,fp8,0,11.418282826741537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,2,128,0,1,fp8,fp8,0,8.271189371744791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,float16,0,11.708415985107422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,fp8,0,11.593556722005209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,fp8,0,11.704320271809896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,float16,0,11.5152219136556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,4,128,0,1,fp8,fp8,0,8.320853551228842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,8,128,0,1,fp8,fp8,0,9.683626810709635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,float16,0,12.715690612792969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,float16,0,5.654186884562175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,fp8,0,8.61678949991862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,float16,0,9.180842717488607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,fp8,0,12.457300821940104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,32,128,0,1,fp8,fp8,0,7.589888254801433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,fp8,0,5.640192031860352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,1,128,0,1,fp8,fp8,0,4.0900265375773115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,2,128,0,1,fp8,fp8,0,4.019541422526042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,fp8,0,5.697877248128255
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,float16,0,5.987840016682942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,4,128,0,1,fp8,fp8,0,4.225877443949382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,float16,0,5.876223882039388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,fp8,0,5.826218922932942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,float16,0,6.321493148803711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,8,128,0,1,fp8,fp8,0,4.641450564066569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,fp8,0,6.324565251668294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,float16,0,2.806613286336263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,float16,0,4.515328089396159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,32,128,0,1,fp8,fp8,0,3.7034667332967124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,fp8,0,4.3692372639973955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,fp8,0,2.772650718688965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,1,128,0,1,fp8,fp8,0,1.917952060699463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,float16,0,2.8869972229003906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,fp8,0,2.888021469116211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,4,128,0,1,fp8,fp8,0,2.1212159792582193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,2,128,0,1,fp8,fp8,0,2.016767978668213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,float16,0,2.988032023111979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,fp8,0,2.9484373728434243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,float16,0,3.1366825103759766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,fp8,0,3.088383992513021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,32,8,128,0,1,fp8,fp8,0,2.253653367360433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,float16,0,1.35150941212972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,float16,0,2.2896639506022134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,fp8,0,2.190336068471273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,fp8,0,1.3764266967773438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,32,128,0,1,fp8,fp8,0,1.8385920524597168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,1,128,0,1,fp8,fp8,0,0.932522694269816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,float16,0,1.3866666158040364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,2,128,0,1,fp8,fp8,0,0.9562453428904215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,fp8,0,1.3834239641825359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,float16,0,1.4634666442871094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,4,128,0,1,fp8,fp8,0,1.02348796526591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,fp8,0,1.4721706708272297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,float16,0,1.5844693183898926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,fp8,0,1.5414613087972004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,32,8,128,0,1,fp8,fp8,0,1.1180373032887776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,float16,0,1.12008531888326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,fp8,0,1.057792027791341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,32,128,0,1,fp8,fp8,0,0.8719360033671061
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,fp8,0,0.6289066473642985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,float16,0,0.6244693199793497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,1,128,0,1,fp8,fp8,0,0.4333226680755615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,float16,0,0.6340266863505045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,fp8,0,0.6294186512629191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,2,128,0,1,fp8,fp8,0,0.4411733150482178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,float16,0,0.677717367808024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,fp8,0,0.6708906491597494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,4,128,0,1,fp8,fp8,0,0.4889599879582723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,float16,0,0.7584426403045654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,fp8,0,0.7381333510080973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,32,8,128,0,1,fp8,fp8,0,0.5550080140431722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,float16,0,0.5220693349838257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,fp8,0,0.48315731684366864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,float16,0,0.2916693290074666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,32,128,0,1,fp8,fp8,0,0.4150613149007161
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,fp8,0,0.2945706645647685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,1,128,0,1,fp8,fp8,0,0.17220266660054526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,float16,0,0.29422932863235474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,fp8,0,0.2923520008722941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,2,128,0,1,fp8,fp8,0,0.17169066270192465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,float16,0,0.2860373258590698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,fp8,0,0.2950826684633891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,4,128,0,1,fp8,fp8,0,0.17561600605646768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,float16,0,0.3155626654624939
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,fp8,0,0.3002026677131653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,32,8,128,0,1,fp8,fp8,0,0.20036266247431436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,float16,0,0.17681066195170084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,fp8,0,0.16827734311421713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,32,128,0,1,fp8,fp8,0,0.10444800059000652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,float16,0,0.15052800377209982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,fp8,0,0.1520639955997467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,1,128,0,1,fp8,fp8,0,0.10342400272687276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,float16,0,0.155648003021876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,float16,0,0.1570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,2,128,0,1,fp8,fp8,0,0.10274133086204529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,fp8,0,0.15803733468055725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,fp8,0,0.1513813336690267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,4,128,0,1,fp8,fp8,0,0.0993280013402303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,float16,0,0.16247466206550598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,fp8,0,0.1609386702378591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,32,8,128,0,1,fp8,fp8,0,0.10052266716957092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,float16,0,0.10496000448862712
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,fp8,0,0.10171733299891154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,32,128,0,1,fp8,fp8,0,0.06502399841944377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,fp8,0,0.09796266754468282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,1,128,0,1,fp8,fp8,0,0.059903999169667564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,float16,0,0.10257066289583842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,fp8,0,0.10120532910029094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,2,128,0,1,fp8,fp8,0,0.05922133227189382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,float16,0,0.09864532947540283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,fp8,0,0.10035199920336406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,4,128,0,1,fp8,fp8,0,0.06109866499900818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,float16,0,0.09557333588600159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,fp8,0,0.09847467144330342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,32,8,128,0,1,fp8,fp8,0,0.06911999980608623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,1,128,0,1,float16,float16,0,8.510122934977213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,1,128,0,1,float16,fp8,0,8.354645411173502
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,1,128,0,1,fp8,fp8,0,6.244864145914714
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,2,128,0,1,float16,float16,0,8.374783833821615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,2,128,0,1,fp8,fp8,0,6.453930536905925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,2,128,0,1,float16,fp8,0,8.358399709065756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,4,128,0,1,float16,float16,0,8.514560063680014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,4,128,0,1,float16,fp8,0,8.558591842651367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,4,128,0,1,fp8,fp8,0,6.641152064005534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,8,128,0,1,float16,float16,0,9.563135782877604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,1,128,0,1,float16,float16,0,4.195839881896973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,8,128,0,1,float16,fp8,0,9.372330983479818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,32,128,0,1,float16,float16,0,8.304469426472982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,32,128,0,1,float16,fp8,0,7.823530832926433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,32,8,128,0,1,fp8,fp8,0,7.801514943440755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,32,128,0,1,fp8,fp8,0,6.850218454996745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,1,128,0,1,float16,fp8,0,4.248746554056804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,1,128,0,1,fp8,fp8,0,3.10698668162028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,2,128,0,1,float16,float16,0,4.225365320841472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,2,128,0,1,fp8,fp8,0,3.1795199712117515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,2,128,0,1,float16,fp8,0,4.240895907084147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,4,128,0,1,fp8,fp8,0,3.225088119506836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,4,128,0,1,float16,float16,0,4.269738515218099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,4,128,0,1,float16,fp8,0,4.327253341674805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,8,128,0,1,float16,float16,0,4.8395945231119795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,8,128,0,1,float16,fp8,0,4.656298637390137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,32,8,128,0,1,fp8,fp8,0,3.816960016886393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,32,128,0,1,float16,float16,0,4.156928062438965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,1,128,0,1,float16,float16,0,2.0351999600728354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,32,128,0,1,fp8,fp8,0,3.4310827255249023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,1,128,0,1,float16,fp8,0,2.0027732849121094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,32,128,0,1,float16,fp8,0,3.9401814142862954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,1,128,0,1,fp8,fp8,0,1.5238827069600422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,2,128,0,1,float16,float16,0,2.0998826026916504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,2,128,0,1,fp8,fp8,0,1.578154722849528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,2,128,0,1,float16,fp8,0,2.1179733276367188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,4,128,0,1,float16,float16,0,2.149888038635254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,4,128,0,1,float16,fp8,0,2.1884586016337075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,4,128,0,1,fp8,fp8,0,1.6040959358215332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,8,128,0,1,float16,float16,0,2.362709363301595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,8,128,0,1,float16,fp8,0,2.34769074122111
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,32,8,128,0,1,fp8,fp8,0,1.8810879389444988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,32,128,0,1,float16,float16,0,2.1114880243937173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,32,128,0,1,fp8,fp8,0,1.657002607981364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,32,128,0,1,float16,fp8,0,2.0003840128580728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,1,128,0,1,float16,float16,0,0.9731413523356119
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,1,128,0,1,float16,fp8,0,0.974677324295044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,1,128,0,1,fp8,fp8,0,0.6923946539560953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,2,128,0,1,float16,float16,0,1.0412373542785645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,2,128,0,1,float16,fp8,0,1.0180266698201497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,2,128,0,1,fp8,fp8,0,0.7280639807383219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,4,128,0,1,float16,fp8,0,1.0641067028045654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,4,128,0,1,float16,float16,0,1.0630826950073242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,4,128,0,1,fp8,fp8,0,0.7714133262634277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,8,128,0,1,float16,float16,0,1.1880106925964355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,8,128,0,1,float16,fp8,0,1.1453440189361572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,32,8,128,0,1,fp8,fp8,0,0.8806400299072266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,32,128,0,1,float16,float16,0,1.032362699508667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,32,128,0,1,float16,fp8,0,0.9574399789174398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,1,128,0,1,float16,float16,0,0.4251306851704915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,32,128,0,1,fp8,fp8,0,0.7874560356140137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,1,128,0,1,float16,fp8,0,0.42342400550842285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,1,128,0,1,fp8,fp8,0,0.31675734122594196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,2,128,0,1,float16,float16,0,0.4399786790211995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,2,128,0,1,float16,fp8,0,0.4452693462371826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,2,128,0,1,fp8,fp8,0,0.33484800656636554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,4,128,0,1,float16,float16,0,0.482474684715271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,4,128,0,1,float16,fp8,0,0.4654080073038737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,4,128,0,1,fp8,fp8,0,0.36881065368652344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,8,128,0,1,float16,float16,0,0.5509119828542074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,8,128,0,1,float16,fp8,0,0.5367466608683268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,32,8,128,0,1,fp8,fp8,0,0.4357120196024577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,32,128,0,1,float16,float16,0,0.4478293259938558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,32,128,0,1,float16,fp8,0,0.4073813358942668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,32,128,0,1,fp8,fp8,0,0.3500373363494873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,1,128,0,1,float16,float16,0,0.18636800845464072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,1,128,0,1,float16,fp8,0,0.19797333081563315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,1,128,0,1,fp8,fp8,0,0.11502933502197266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,2,128,0,1,float16,float16,0,0.1950719952583313
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,2,128,0,1,float16,fp8,0,0.19729065895080566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,2,128,0,1,fp8,fp8,0,0.1153706709543864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,4,128,0,1,float16,float16,0,0.1945599913597107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,4,128,0,1,float16,fp8,0,0.19473065932591757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,4,128,0,1,fp8,fp8,0,0.1155413289864858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,8,128,0,1,float16,float16,0,0.2044586737950643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,8,128,0,1,float16,fp8,0,0.19438934326171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,32,128,0,1,float16,float16,0,0.12270933389663696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,32,8,128,0,1,fp8,fp8,0,0.14011733730634054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,32,128,0,1,float16,fp8,0,0.10956799983978271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,32,128,0,1,fp8,fp8,0,0.07048533360163371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,1,128,0,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,1,128,0,1,fp8,fp8,0,0.06656000018119812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,2,128,0,1,float16,float16,0,0.09693866968154907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,1,128,0,1,float16,fp8,0,0.09745066364606221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,2,128,0,1,float16,fp8,0,0.09693866968154907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,2,128,0,1,fp8,fp8,0,0.06638933221499126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,4,128,0,1,float16,float16,0,0.09915733337402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,4,128,0,1,float16,fp8,0,0.09796266754468282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,4,128,0,1,fp8,fp8,0,0.06758399804433186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,8,128,0,1,float16,float16,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,8,128,0,1,float16,fp8,0,0.09966933727264404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,32,8,128,0,1,fp8,fp8,0,0.06741333504517873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,32,128,0,1,float16,float16,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,32,128,0,1,float16,fp8,0,0.06365866462389629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,32,128,0,1,fp8,fp8,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,1,128,0,1,float16,float16,0,0.05922133227189382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,1,128,0,1,float16,fp8,0,0.0602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,1,128,0,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,2,128,0,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,2,128,0,1,float16,fp8,0,0.06075733403364817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,2,128,0,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,4,128,0,1,float16,float16,0,0.05973333120346069
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,4,128,0,1,float16,fp8,0,0.059392000238100685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,4,128,0,1,fp8,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,8,128,0,1,float16,float16,0,0.06092800199985504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,8,128,0,1,float16,fp8,0,0.059562668204307556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,32,8,128,0,1,fp8,fp8,0,0.04215466479460398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,32,128,0,1,float16,float16,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,32,128,0,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,32,128,0,1,fp8,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,1,128,0,1,float16,float16,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,1,128,0,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,1,128,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,2,128,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,2,128,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,2,128,0,1,fp8,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,4,128,0,1,float16,float16,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,4,128,0,1,float16,fp8,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,4,128,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,8,128,0,1,float16,float16,0,0.034304000437259674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,8,128,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,32,8,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,1,128,0,1,float16,float16,0,3.6145493189493814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,1,128,0,1,float16,fp8,0,3.6234238942464194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,1,128,0,1,fp8,fp8,0,2.6821972529093423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,2,128,0,1,fp8,fp8,0,2.778794606526693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,2,128,0,1,float16,fp8,0,3.660799980163574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,2,128,0,1,float16,float16,0,3.7239465713500977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,4,128,0,1,float16,float16,0,3.9794346491495767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,4,128,0,1,float16,fp8,0,3.8761812845865884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,4,128,0,1,fp8,fp8,0,2.9276161193847656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,8,128,0,1,float16,float16,0,4.603221257527669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,8,128,0,1,fp8,fp8,0,3.5058345794677734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,1,128,0,1,float16,float16,0,1.7476266225179036
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,32,128,0,1,float16,float16,0,4.168362617492676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,32,8,128,0,1,float16,fp8,0,4.561237335205078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,32,128,0,1,float16,fp8,0,3.9541759490966797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,32,128,0,1,fp8,fp8,0,3.4075307846069336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,1,128,0,1,float16,fp8,0,1.7491626739501953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,1,128,0,1,fp8,fp8,0,1.3351252873738606
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,2,128,0,1,float16,float16,0,1.8967893918355305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,2,128,0,1,fp8,fp8,0,1.3491199811299641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,2,128,0,1,float16,fp8,0,1.8025813102722168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,4,128,0,1,fp8,fp8,0,1.4387200673421223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,4,128,0,1,float16,float16,0,2.1167786916097007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,4,128,0,1,float16,fp8,0,1.952426592508952
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,8,128,0,1,float16,float16,0,2.3026347160339355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,8,128,0,1,float16,fp8,0,2.2335146268208823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,32,8,128,0,1,fp8,fp8,0,1.7078612645467122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,32,128,0,1,float16,float16,0,2.093397299448649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,32,128,0,1,float16,fp8,0,1.99236265818278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,32,128,0,1,fp8,fp8,0,1.675605297088623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,1,128,0,1,float16,float16,0,0.8282453219095866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,1,128,0,1,float16,fp8,0,0.8239786624908447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,1,128,0,1,fp8,fp8,0,0.6333440144856771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,2,128,0,1,float16,float16,0,0.9359359741210938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,2,128,0,1,float16,fp8,0,0.8686933517456055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,2,128,0,1,fp8,fp8,0,0.6734506289164225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,4,128,0,1,float16,float16,0,1.0321919918060303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,4,128,0,1,float16,fp8,0,1.0168320337931316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,4,128,0,1,fp8,fp8,0,0.6966613133748373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,8,128,0,1,float16,float16,0,1.128618637720744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,8,128,0,1,float16,fp8,0,1.0980693499247234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,32,8,128,0,1,fp8,fp8,0,0.8224426905314127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,32,128,0,1,float16,float16,0,1.0318506558736165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,1,128,0,1,float16,float16,0,0.3326293428738912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,32,128,0,1,float16,fp8,0,0.9615360101064047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,32,128,0,1,fp8,fp8,0,0.7637333075205485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,1,128,0,1,float16,fp8,0,0.33058132727940875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,1,128,0,1,fp8,fp8,0,0.254805326461792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,2,128,0,1,float16,float16,0,0.3529386520385742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,2,128,0,1,float16,fp8,0,0.3456000089645386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,2,128,0,1,fp8,fp8,0,0.27477333943049115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,4,128,0,1,float16,float16,0,0.40243200461069745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,4,128,0,1,fp8,fp8,0,0.307370662689209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,4,128,0,1,float16,fp8,0,0.39048532644907635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,8,128,0,1,float16,float16,0,0.5007359981536865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,8,128,0,1,fp8,fp8,0,0.3696639935175578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,32,8,128,0,1,float16,fp8,0,0.4809386730194092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,32,128,0,1,float16,float16,0,0.4474879900614421
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,32,128,0,1,float16,fp8,0,0.4041386842727661
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,32,128,0,1,fp8,fp8,0,0.3280213276545207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,1,128,0,1,float16,fp8,0,0.1384106675783793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,1,128,0,1,float16,float16,0,0.13943466544151306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,1,128,0,1,fp8,fp8,0,0.08584533135096233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,2,128,0,1,float16,float16,0,0.13158399860064188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,2,128,0,1,fp8,fp8,0,0.08533333738644917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,2,128,0,1,float16,fp8,0,0.1389226714769999
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,4,128,0,1,float16,float16,0,0.12902399897575378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,4,128,0,1,float16,fp8,0,0.1360213359196981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,4,128,0,1,fp8,fp8,0,0.08584533135096233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,8,128,0,1,float16,float16,0,0.14779733618100485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,8,128,0,1,float16,fp8,0,0.13960533340771994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,32,8,128,0,1,fp8,fp8,0,0.08925867080688477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,32,128,0,1,float16,float16,0,0.09215999643007915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,32,128,0,1,float16,fp8,0,0.08328533172607422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,32,128,0,1,fp8,fp8,0,0.053930665055910744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,1,128,0,1,float16,float16,0,0.06929066777229309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,1,128,0,1,float16,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,1,128,0,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,2,128,0,1,float16,float16,0,0.06638933221499126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,2,128,0,1,float16,fp8,0,0.06638933221499126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,2,128,0,1,fp8,fp8,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,4,128,0,1,float16,float16,0,0.06741333504517873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,4,128,0,1,float16,fp8,0,0.06604800124963124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,4,128,0,1,fp8,fp8,0,0.048810665806134544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,8,128,0,1,float16,float16,0,0.07014399766921997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,8,128,0,1,float16,fp8,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,32,128,0,1,float16,float16,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,32,8,128,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,32,128,0,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,32,128,0,1,fp8,fp8,0,0.034304000437259674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,1,128,0,1,float16,float16,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,1,128,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,1,128,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,2,128,0,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,2,128,0,1,float16,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,4,128,0,1,float16,float16,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,2,128,0,1,fp8,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,4,128,0,1,float16,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,4,128,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,8,128,0,1,float16,float16,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,8,128,0,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,32,8,128,0,1,fp8,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,32,128,0,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,32,128,0,1,float16,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,32,128,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,1,128,0,1,float16,float16,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,1,128,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,1,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,2,128,0,1,float16,float16,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,2,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,4,128,0,1,float16,float16,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,2,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,4,128,0,1,float16,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,4,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,8,128,0,1,float16,float16,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,8,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,32,8,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,32,128,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,32,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,32,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,1,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,1,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,1,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,2,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,2,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,2,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,4,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,4,128,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,4,128,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,8,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,8,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,32,8,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,1,128,0,1,fp8,fp8,0,1.222314675649007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,1,128,0,1,float16,float16,0,1.757354736328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,1,128,0,1,float16,fp8,0,1.7447253863016765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,2,128,0,1,float16,float16,0,1.8218666712443035
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,2,128,0,1,float16,fp8,0,1.806165377298991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,2,128,0,1,fp8,fp8,0,1.2960426807403564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,4,128,0,1,float16,float16,0,1.9817813237508137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,4,128,0,1,float16,fp8,0,1.9527680079142253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,4,128,0,1,fp8,fp8,0,1.4213120142618816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,8,128,0,1,float16,float16,0,2.320554733276367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,8,128,0,1,fp8,fp8,0,1.7247573534647624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,32,8,128,0,1,float16,fp8,0,2.244266668955485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,32,128,0,1,float16,float16,0,2.1073919932047525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,32,128,0,1,float16,fp8,0,2.003455956776937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,32,128,0,1,fp8,fp8,0,1.6447146733601887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,1,128,0,1,float16,float16,0,0.8376320203145345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,1,128,0,1,float16,fp8,0,0.8313173453013102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,1,128,0,1,fp8,fp8,0,0.5978453159332275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,2,128,0,1,float16,float16,0,0.9089706738789877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,2,128,0,1,float16,fp8,0,0.9101653099060059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,2,128,0,1,fp8,fp8,0,0.5995519955952963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,4,128,0,1,float16,float16,0,0.9678506851196289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,4,128,0,1,float16,fp8,0,0.9521493117014567
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,4,128,0,1,fp8,fp8,0,0.6647466818491617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,8,128,0,1,float16,float16,0,1.1327146689097087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,8,128,0,1,fp8,fp8,0,0.8200533390045166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,32,8,128,0,1,float16,fp8,0,1.1062613328297932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,32,128,0,1,float16,float16,0,1.0432853698730469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,1,128,0,1,float16,float16,0,0.32733867565790814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,32,128,0,1,float16,fp8,0,0.9669973055521647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,32,128,0,1,fp8,fp8,0,0.7683413028717041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,1,128,0,1,float16,fp8,0,0.3479893207550049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,1,128,0,1,fp8,fp8,0,0.2486613392829895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,2,128,0,1,float16,float16,0,0.3742719888687134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,2,128,0,1,float16,fp8,0,0.34201598167419434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,2,128,0,1,fp8,fp8,0,0.25088000297546387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,4,128,0,1,float16,float16,0,0.4118186632792155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,4,128,0,1,float16,fp8,0,0.3947519858678182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,4,128,0,1,fp8,fp8,0,0.28654932975769043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,8,128,0,1,float16,float16,0,0.5111466646194458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,8,128,0,1,float16,fp8,0,0.4904959996541341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,32,8,128,0,1,fp8,fp8,0,0.3843413194020589
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,32,128,0,1,fp8,fp8,0,0.3174399932225545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,32,128,0,1,float16,fp8,0,0.40328534444173175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,32,128,0,1,float16,float16,0,0.4471466541290283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,1,128,0,1,float16,float16,0,0.09437867005666097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,1,128,0,1,float16,fp8,0,0.09574400385220845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,1,128,0,1,fp8,fp8,0,0.07082666456699371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,2,128,0,1,float16,float16,0,0.09540266791979472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,2,128,0,1,fp8,fp8,0,0.06946133573849995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,2,128,0,1,float16,fp8,0,0.09471999605496724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,4,128,0,1,float16,float16,0,0.10240000486373901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,4,128,0,1,float16,fp8,0,0.10103467106819153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,8,128,0,1,float16,float16,0,0.12475732962290446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,4,128,0,1,fp8,fp8,0,0.07099733253320058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,8,128,0,1,float16,fp8,0,0.11417599519093831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,32,8,128,0,1,fp8,fp8,0,0.07799466451009114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,32,128,0,1,float16,float16,0,0.07799466451009114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,32,128,0,1,float16,fp8,0,0.062122667829195656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,32,128,0,1,fp8,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,1,128,0,1,float16,float16,0,0.0506879985332489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,1,128,0,1,float16,fp8,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,1,128,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,2,128,0,1,float16,float16,0,0.051882664362589516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,2,128,0,1,float16,fp8,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,2,128,0,1,fp8,fp8,0,0.039936001102129616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,4,128,0,1,float16,float16,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,4,128,0,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,4,128,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,8,128,0,1,float16,float16,0,0.05273599922657013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,8,128,0,1,float16,fp8,0,0.05239466826121012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,32,8,128,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,32,128,0,1,float16,float16,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,32,128,0,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,32,128,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,1,128,0,1,float16,float16,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,1,128,0,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,1,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,2,128,0,1,float16,float16,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,2,128,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,2,128,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,4,128,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,4,128,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,4,128,0,1,fp8,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,8,128,0,1,float16,float16,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,32,128,0,1,float16,float16,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,8,128,0,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,32,8,128,0,1,fp8,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,32,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,32,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,1,128,0,1,float16,float16,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,1,128,0,1,float16,fp8,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,1,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,2,128,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,2,128,0,1,float16,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,2,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,4,128,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,4,128,0,1,float16,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,4,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,8,128,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,8,128,0,1,float16,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,32,8,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,32,128,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,32,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,32,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,1,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,1,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,1,128,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,2,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,2,128,0,1,float16,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,4,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,2,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,4,128,0,1,float16,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,4,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,8,128,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,8,128,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,32,8,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,32,128,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,32,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,32,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,1,128,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,1,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,1,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,2,128,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,2,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,2,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,4,128,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,4,128,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,4,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,8,128,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,8,128,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,32,8,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,1,128,0,1,float16,fp8,0,0.8401920000712076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,1,128,0,1,fp8,fp8,0,0.5560319821039835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,1,128,0,1,float16,float16,0,0.8436053593953451
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,2,128,0,1,float16,float16,0,0.8785920143127441
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,2,128,0,1,float16,fp8,0,0.8697173595428467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,2,128,0,1,fp8,fp8,0,0.5983573198318481
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,4,128,0,1,float16,float16,0,0.9688746929168701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,4,128,0,1,float16,fp8,0,0.9533440272013346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,4,128,0,1,fp8,fp8,0,0.6842026710510254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,8,128,0,1,float16,float16,0,1.1258880297342937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,8,128,0,1,float16,fp8,0,1.105237325032552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,32,8,128,0,1,fp8,fp8,0,0.8272213141123453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,32,128,0,1,float16,float16,0,1.0403839747111003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,32,128,0,1,float16,fp8,0,0.9838933149973551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,32,128,0,1,fp8,fp8,0,0.7816533247629801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,1,128,0,1,float16,float16,0,0.33177600304285687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,1,128,0,1,float16,fp8,0,0.32819199562072754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,1,128,0,1,fp8,fp8,0,0.22203733523686728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,2,128,0,1,float16,float16,0,0.355840007464091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,2,128,0,1,float16,fp8,0,0.3495253324508667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,2,128,0,1,fp8,fp8,0,0.237226665019989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,4,128,0,1,float16,float16,0,0.4113066593805949
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,4,128,0,1,float16,fp8,0,0.3947519858678182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,4,128,0,1,fp8,fp8,0,0.27409066756566364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,8,128,0,1,float16,float16,0,0.5150719881057739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,8,128,0,1,float16,fp8,0,0.4949333270390828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,32,8,128,0,1,fp8,fp8,0,0.3543039957682292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,32,128,0,1,float16,float16,0,0.45687464872996014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,32,128,0,1,float16,fp8,0,0.4020906686782837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,32,128,0,1,fp8,fp8,0,0.3188053369522095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,1,128,0,1,float16,float16,0,0.08243200182914734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,1,128,0,1,float16,fp8,0,0.08311466872692108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,1,128,0,1,fp8,fp8,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,2,128,0,1,float16,float16,0,0.08413867155710857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,2,128,0,1,float16,fp8,0,0.08277333279450734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,2,128,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,4,128,0,1,float16,float16,0,0.09471999605496724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,4,128,0,1,float16,fp8,0,0.09028266867001851
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,4,128,0,1,fp8,fp8,0,0.07236266632874806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,8,128,0,1,float16,float16,0,0.11741866668065389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,8,128,0,1,float16,fp8,0,0.1063253382841746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,32,8,128,0,1,fp8,fp8,0,0.07014399766921997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,32,128,0,1,float16,float16,0,0.08004266520341237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,32,128,0,1,float16,fp8,0,0.056661332647005715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,32,128,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,1,128,0,1,float16,float16,0,0.04420266548792521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,1,128,0,1,float16,fp8,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,1,128,0,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,2,128,0,1,float16,float16,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,2,128,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,2,128,0,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,4,128,0,1,float16,float16,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,4,128,0,1,float16,fp8,0,0.04454400142033895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,4,128,0,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,8,128,0,1,float16,float16,0,0.04761599997679392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,8,128,0,1,float16,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,32,8,128,0,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,32,128,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,32,128,0,1,float16,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,32,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,1,128,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,1,128,0,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,1,128,0,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,2,128,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,2,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,2,128,0,1,fp8,fp8,0,0.022015998760859173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,4,128,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,4,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,8,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,4,128,0,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,8,128,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,32,8,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,32,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,32,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,32,128,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,1,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,1,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,1,128,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,2,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,2,128,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,4,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,2,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,4,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,4,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,8,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,8,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,32,8,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,32,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,32,128,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,32,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,1,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,1,128,0,1,fp8,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,1,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,2,128,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,2,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,2,128,0,1,fp8,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,4,128,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,4,128,0,1,fp8,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,4,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,8,128,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,8,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,32,8,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,32,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,32,128,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,32,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,1,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,1,128,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,2,128,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,1,128,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,2,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,2,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,4,128,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,4,128,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,4,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,8,128,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,8,128,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,32,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,32,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,32,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,1,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,32,8,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,1,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,1,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,2,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,2,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,4,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,4,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,8,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,8,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,32,8,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,1,128,0,1,float16,float16,0,0.3336533308029175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,1,128,0,1,float16,fp8,0,0.3295573393503825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,1,128,0,1,fp8,fp8,0,0.2333013415336609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,2,128,0,1,float16,float16,0,0.35498666763305664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,2,128,0,1,float16,fp8,0,0.3510613441467285
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,2,128,0,1,fp8,fp8,0,0.2515626748402913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,4,128,0,1,float16,float16,0,0.4116479953130086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,4,128,0,1,float16,fp8,0,0.39765334129333496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,4,128,0,1,fp8,fp8,0,0.2887679934501648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,8,128,0,1,float16,float16,0,0.5326506694157919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,8,128,0,1,float16,fp8,0,0.4910080035527547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,32,8,128,0,1,fp8,fp8,0,0.3572053511937459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,32,128,0,1,float16,fp8,0,0.40396801630655926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,32,128,0,1,float16,float16,0,0.46506667137145996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,32,128,0,1,fp8,fp8,0,0.32546132802963257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,1,128,0,1,float16,float16,0,0.08635733524958293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,1,128,0,1,float16,fp8,0,0.0865280032157898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,1,128,0,1,fp8,fp8,0,0.06946133573849995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,2,128,0,1,float16,float16,0,0.08994133273760478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,2,128,0,1,float16,fp8,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,2,128,0,1,fp8,fp8,0,0.07048533360163371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,4,128,0,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,4,128,0,1,float16,fp8,0,0.09147733449935913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,4,128,0,1,fp8,fp8,0,0.07133866846561432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,8,128,0,1,float16,float16,0,0.11912533640861511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,8,128,0,1,float16,fp8,0,0.10598400235176086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,32,8,128,0,1,fp8,fp8,0,0.08345599969228108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,32,128,0,1,float16,float16,0,0.07970133423805237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,32,128,0,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,1,128,0,1,float16,float16,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,1,128,0,1,float16,fp8,0,0.046762665112813316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,32,128,0,1,fp8,fp8,0,0.04351999859015147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,1,128,0,1,fp8,fp8,0,0.03908266623814901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,2,128,0,1,float16,float16,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,2,128,0,1,float16,fp8,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,2,128,0,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,4,128,0,1,float16,float16,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,4,128,0,1,float16,fp8,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,4,128,0,1,fp8,fp8,0,0.03908266623814901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,8,128,0,1,float16,fp8,0,0.04795733094215393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,8,128,0,1,fp8,fp8,0,0.04027733455101649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,32,128,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,32,8,128,0,1,float16,float16,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,32,128,0,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,32,128,0,1,fp8,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,1,128,0,1,float16,float16,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,1,128,0,1,float16,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,2,128,0,1,float16,float16,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,1,128,0,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,2,128,0,1,float16,fp8,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,2,128,0,1,fp8,fp8,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,4,128,0,1,float16,float16,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,4,128,0,1,float16,fp8,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,4,128,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,8,128,0,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,8,128,0,1,float16,float16,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,32,8,128,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,32,128,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,1,128,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,32,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,32,128,0,1,float16,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,1,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,1,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,2,128,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,2,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,2,128,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,4,128,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,4,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,4,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,8,128,0,1,float16,float16,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,8,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,32,8,128,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,32,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,32,128,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,32,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,1,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,1,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,1,128,0,1,fp8,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,2,128,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,2,128,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,2,128,0,1,fp8,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,4,128,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,4,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,4,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,8,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,8,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,32,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,32,8,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,32,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,32,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,1,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,1,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,2,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,1,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,2,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,4,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,4,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,2,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,4,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,8,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,8,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,32,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,32,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,32,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,32,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,2,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,4,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,4,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,2,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,8,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,4,128,0,1,fp8,fp8,0,0.009712000067035357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,8,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,32,8,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,32,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,32,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,32,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,2,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,4,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,8,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,8,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,32,8,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,1,128,0,1,float16,float16,0,0.11946666240692139
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,1,128,0,1,fp8,fp8,0,0.09506133198738098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,1,128,0,1,float16,fp8,0,0.1186133325099945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,2,128,0,1,float16,fp8,0,0.11844266454378764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,2,128,0,1,float16,float16,0,0.1204906702041626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,2,128,0,1,fp8,fp8,0,0.0962559978167216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,4,128,0,1,float16,float16,0,0.12714667121569315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,4,128,0,1,float16,fp8,0,0.12219732999801636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,4,128,0,1,fp8,fp8,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,8,128,0,1,float16,float16,0,0.14643200238545737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,32,128,0,1,float16,float16,0,0.09250133236249287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,8,128,0,1,fp8,fp8,0,0.1053013304869334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,32,8,128,0,1,float16,fp8,0,0.13414399822553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,32,128,0,1,fp8,fp8,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,1,128,0,1,float16,float16,0,0.06365866462389629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,1,128,0,1,float16,fp8,0,0.06434133152167003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,1,128,0,1,fp8,fp8,0,0.05120000243186951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,2,128,0,1,float16,fp8,0,0.06382933259010315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,2,128,0,1,float16,float16,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,2,128,0,1,fp8,fp8,0,0.05120000243186951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,32,128,0,1,float16,fp8,0,0.0718506673971812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,4,128,0,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,4,128,0,1,float16,fp8,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,4,128,0,1,float16,float16,0,0.06485333542029063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,8,128,0,1,float16,float16,0,0.06553600231806438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,8,128,0,1,float16,fp8,0,0.06468266745408376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,32,8,128,0,1,fp8,fp8,0,0.05256533126036326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,32,128,0,1,float16,float16,0,0.038912000755469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,32,128,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,1,128,0,1,float16,float16,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,1,128,0,1,float16,fp8,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,1,128,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,2,128,0,1,float16,float16,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,2,128,0,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,2,128,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,4,128,0,1,float16,float16,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,4,128,0,1,float16,fp8,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,4,128,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,8,128,0,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,8,128,0,1,float16,fp8,0,0.036864000062147774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,8,128,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,32,128,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,32,128,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,32,32,128,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,32,128,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,1,128,0,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,1,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,1,128,0,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,2,128,0,1,float16,float16,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,2,128,0,1,float16,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,2,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,4,128,0,1,float16,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,8,128,0,1,float16,float16,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,4,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,8,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,32,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,8,128,0,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,32,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,32,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,32,4,128,0,1,float16,float16,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,1,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,1,128,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,1,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,2,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,2,128,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,2,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,4,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,4,128,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,8,128,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,8,128,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,8,128,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,32,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,32,128,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,32,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,1,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,1,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,1,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,2,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,2,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,2,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,4,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,4,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,8,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,8,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,32,4,128,0,1,float16,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,32,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,32,8,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,32,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,32,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,1,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,1,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,2,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,4,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,4,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,4,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,8,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,8,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,32,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,32,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,1,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,32,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,2,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,32,2,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,2,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,4,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,4,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,8,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,8,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,32,128,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,32,128,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,32,128,0,1,fp8,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,1,128,0,1,fp8,fp8,0,0.00903466654320558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,1,128,0,1,float16,float16,0,0.009381333366036415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,2,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,4,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,4,128,0,1,fp8,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,8,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,8,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,32,8,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,32,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,1,128,0,1,fp8,fp8,0,50.891265869140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,2,128,0,1,fp8,fp8,0,50.07189432779948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,float16,0,87.60337320963542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,float16,0,88.60842895507812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,fp8,0,87.95887247721355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,fp8,0,88.29832967122395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,float16,0,87.59415690104167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,4,128,0,1,fp8,fp8,0,51.18993123372396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,fp8,0,88.82124837239583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,24,128,0,1,fp8,fp8,0,26.412373860677082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,float16,0,43.44081115722656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,fp8,0,43.62461853027344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,8,128,0,1,fp8,fp8,0,50.5536855061849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,float16,0,89.01922607421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,1,128,0,1,fp8,fp8,0,24.3778559366862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,float16,0,42.5533447265625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,fp8,0,88.82022094726562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,2,128,0,1,fp8,fp8,0,24.72601572672526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,float16,0,41.624916076660156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,fp8,0,41.89678955078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,float16,0,41.84763844807943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,fp8,0,41.447593688964844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,4,128,0,1,fp8,fp8,0,25.29143524169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,fp8,0,41.46107737223307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,8,128,0,1,fp8,fp8,0,25.216512044270832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,float16,0,21.126144409179688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,float16,0,42.54924774169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,fp8,0,20.912469228108723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,fp8,0,41.93006896972656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,24,128,0,1,fp8,fp8,0,13.217792510986328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,float16,0,21.279402414957683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,fp8,0,20.394495646158855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,1,128,0,1,fp8,fp8,0,12.408832550048828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,2,128,0,1,fp8,fp8,0,12.458325703938803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,float16,0,20.494164784749348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,4,128,0,1,fp8,fp8,0,12.787541707356771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,fp8,0,20.55406951904297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,float16,0,20.194133758544922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,fp8,0,20.85580825805664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,8,128,0,1,fp8,fp8,0,12.433066050211588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,24,128,0,1,fp8,fp8,0,6.638250350952148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,float16,0,21.013163248697918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,fp8,0,21.286570231119793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,float16,0,10.976768493652344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,fp8,0,11.245909372965494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,float16,0,10.496341069539389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,fp8,0,10.601984024047852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,1,128,0,1,fp8,fp8,0,6.176085154215495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,2,128,0,1,fp8,fp8,0,5.9912535349528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,float16,0,10.470912297566732
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,fp8,0,10.60693359375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,4,128,0,1,fp8,fp8,0,5.822122573852539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,fp8,0,10.756778717041016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,float16,0,10.876757303873697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,8,128,0,1,fp8,fp8,0,5.876735687255859
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,fp8,0,10.678783416748047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,float16,0,10.460671742757162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,1,128,0,1,fp8,fp8,0,28.396031697591145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,2,128,0,1,fp8,fp8,0,28.92919413248698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,fp8,0,47.65815226236979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,float16,0,48.375467936197914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,float16,0,48.63556416829427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,fp8,0,48.28978983561198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,float16,0,48.32733662923177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,4,128,0,1,fp8,fp8,0,28.645034790039062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,fp8,0,48.34884134928385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,float16,0,25.640276590983074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,8,128,0,1,fp8,fp8,0,30.29486846923828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,fp8,0,25.6005121866862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,24,128,0,1,fp8,fp8,0,15.404373168945312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,float16,0,48.6997324625651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,float16,0,23.871658325195312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,fp8,0,48.54937744140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,fp8,0,23.54688008626302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,1,128,0,1,fp8,fp8,0,14.626304626464844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,2,128,0,1,fp8,fp8,0,13.953024546305338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,float16,0,24.05853780110677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,fp8,0,23.951189676920574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,4,128,0,1,fp8,fp8,0,14.477311452229818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,float16,0,23.886678059895832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,fp8,0,23.745707194010418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,8,128,0,1,fp8,fp8,0,14.675114949544271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,fp8,0,24.67908223470052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,float16,0,12.71022923787435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,float16,0,24.16588846842448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,fp8,0,12.839594523111979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,24,128,0,1,fp8,fp8,0,7.476053237915039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,float16,0,11.912704467773438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,fp8,0,11.970218658447266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,1,128,0,1,fp8,fp8,0,7.004842758178711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,fp8,0,12.088661193847656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,float16,0,12.554581960042318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,2,128,0,1,fp8,fp8,0,7.290367762247722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,fp8,0,12.180479685465494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,float16,0,12.16034189860026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,4,128,0,1,fp8,fp8,0,7.22329584757487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,float16,0,12.004522959391275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,fp8,0,12.111701965332031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,float16,0,6.1694291432698565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,24,8,128,0,1,fp8,fp8,0,6.922069549560547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,24,128,0,1,fp8,fp8,0,3.744426727294922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,fp8,0,6.309717178344727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,float16,0,6.065493265787761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,fp8,0,5.97657585144043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,1,128,0,1,fp8,fp8,0,3.1766185760498047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,float16,0,5.945173263549805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,2,128,0,1,fp8,fp8,0,3.2892586390177407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,float16,0,5.278890609741211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,fp8,0,6.171989440917969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,fp8,0,6.07368532816569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,4,128,0,1,fp8,fp8,0,3.4536107381184897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,float16,0,5.70419184366862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,fp8,0,5.509802500406901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,24,8,128,0,1,fp8,fp8,0,3.428010622660319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,1,128,0,1,fp8,fp8,0,20.031658172607422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,2,128,0,1,fp8,fp8,0,20.122112274169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,float16,0,33.71281178792318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,fp8,0,33.52268727620443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,float16,0,33.490089416503906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,float16,0,34.14237976074219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,fp8,0,33.84951527913412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,fp8,0,33.97461446126302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,4,128,0,1,fp8,fp8,0,20.59502919514974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,8,128,0,1,fp8,fp8,0,20.774912516276043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,float16,0,18.27191416422526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,24,128,0,1,fp8,fp8,0,11.464874267578125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,float16,0,16.48315684000651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,fp8,0,17.982463836669922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,float16,0,34.41168975830078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,fp8,0,34.918741861979164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,fp8,0,17.320618947347004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,1,128,0,1,fp8,fp8,0,10.058752059936523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,2,128,0,1,fp8,fp8,0,9.99560546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,fp8,0,16.85538101196289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,4,128,0,1,fp8,fp8,0,10.265600204467773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,float16,0,16.719018300374348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,float16,0,16.76475779215495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,fp8,0,16.906239827473957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,float16,0,17.470293680826824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,fp8,0,17.25491205851237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,24,8,128,0,1,fp8,fp8,0,10.312704086303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,24,128,0,1,fp8,fp8,0,5.530282974243164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,fp8,0,8.905728022257486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,float16,0,9.32693354288737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,float16,0,8.146944046020508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,fp8,0,8.405333201090494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,1,128,0,1,fp8,fp8,0,4.539221445719401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,2,128,0,1,fp8,fp8,0,4.706816037495931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,float16,0,8.319317499796549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,fp8,0,8.315050760904947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,4,128,0,1,fp8,fp8,0,4.555775960286458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,float16,0,8.507562637329102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,fp8,0,8.624298731486002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,float16,0,8.29593594868978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,fp8,0,4.2704213460286455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,float16,0,4.384597460428874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,24,128,0,1,fp8,fp8,0,2.753194808959961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,8,128,0,1,fp8,fp8,0,5.050538698832194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,float16,0,3.923285484313965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,fp8,0,9.044480005900065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,1,128,0,1,fp8,fp8,0,2.2816425959269204
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,fp8,0,4.015786806742351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,2,128,0,1,fp8,fp8,0,2.2785706520080566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,float16,0,3.761493364969889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,fp8,0,3.8888107935587564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,float16,0,4.042581240336101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,fp8,0,4.007594744364421
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,4,128,0,1,fp8,fp8,0,2.3621973991394043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,float16,0,3.944789250691732
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,fp8,0,4.142421404520671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,24,8,128,0,1,fp8,fp8,0,2.403157393137614
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,1,128,0,1,fp8,fp8,0,27.368789672851562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,2,128,0,1,fp8,fp8,0,27.58263397216797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,float16,0,44.57591247558594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,fp8,0,44.50884501139323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,float16,0,44.24482218424479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,float16,0,45.561004638671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,fp8,0,45.882710774739586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,fp8,0,44.163411458333336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,4,128,0,1,fp8,fp8,0,28.297556559244793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,24,128,0,1,fp8,fp8,0,15.48919423421224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,8,128,0,1,fp8,fp8,0,28.368382771809895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,float16,0,24.05700174967448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,fp8,0,24.334335327148438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,float16,0,45.634562174479164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,float16,0,21.368489583333332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,fp8,0,44.91007995605469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,fp8,0,21.671254475911457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,1,128,0,1,fp8,fp8,0,13.531988779703775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,2,128,0,1,fp8,fp8,0,13.41269302368164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,float16,0,21.920255025227863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,fp8,0,22.232747395833332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,fp8,0,21.641385396321613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,4,128,0,1,fp8,fp8,0,13.376171112060547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,float16,0,21.82604726155599
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,float16,0,22.273707071940105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,float16,0,12.108970642089844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,8,128,0,1,fp8,fp8,0,14.184959411621094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,fp8,0,12.008277893066406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,24,128,0,1,fp8,fp8,0,7.415466944376628
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,fp8,0,22.9031244913737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,float16,0,10.70199457804362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,fp8,0,11.184640248616537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,1,128,0,1,fp8,fp8,0,6.651391983032227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,2,128,0,1,fp8,fp8,0,6.652245203653972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,float16,0,11.034282684326172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,fp8,0,11.306837717692057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,4,128,0,1,fp8,fp8,0,6.605653127034505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,float16,0,10.832895914713541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,float16,0,11.347797393798828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,fp8,0,11.5775146484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,8,128,0,1,fp8,fp8,0,6.86404291788737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,24,128,0,1,fp8,fp8,0,3.7374293009440103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,float16,0,5.7719465891520185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,fp8,0,11.567957560221354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,fp8,0,5.786624272664388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,float16,0,5.3406721750895185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,1,128,0,1,fp8,fp8,0,3.032917340596517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,fp8,0,5.355007807413737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,2,128,0,1,fp8,fp8,0,3.009536107381185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,float16,0,5.328042666117351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,fp8,0,5.271039962768555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,4,128,0,1,fp8,fp8,0,3.036160151163737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,float16,0,5.52345593770345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,fp8,0,5.551616032918294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,fp8,0,5.544106801350911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,float16,0,5.605546951293945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,24,8,128,0,1,fp8,fp8,0,3.1641600926717124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,float16,0,2.9030399322509766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,fp8,0,2.833237330118815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,24,128,0,1,fp8,fp8,0,1.8471253712972004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,float16,0,2.4905385971069336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,fp8,0,2.558634599049886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,1,128,0,1,fp8,fp8,0,1.51910400390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,float16,0,2.515456040700277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,2,128,0,1,fp8,fp8,0,1.4899199803670247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,fp8,0,2.4546987215677896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,float16,0,2.491391976674398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,fp8,0,2.4499200185139975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,4,128,0,1,fp8,fp8,0,1.557162602742513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,float16,0,2.650282700856527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,fp8,0,2.553343931833903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,24,8,128,0,1,fp8,fp8,0,1.6030720074971516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,1,128,0,1,fp8,fp8,0,15.513258616129557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,2,128,0,1,fp8,fp8,0,16.114176432291668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,float16,0,25.87238311767578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,fp8,0,25.946795145670574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,float16,0,25.915562947591145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,fp8,0,25.682772318522137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,float16,0,26.197845458984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,fp8,0,26.7151362101237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,4,128,0,1,fp8,fp8,0,16.045738220214844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,8,128,0,1,fp8,fp8,0,16.67208480834961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,float16,0,14.070613861083984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,24,128,0,1,fp8,fp8,0,9.383594512939453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,fp8,0,14.12829844156901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,float16,0,27.01892344156901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,fp8,0,26.697898864746094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,float16,0,13.24441655476888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,fp8,0,12.459349314371744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,1,128,0,1,fp8,fp8,0,7.502848307291667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,2,128,0,1,fp8,fp8,0,8.00921630859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,float16,0,12.59707768758138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,fp8,0,12.95428212483724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,4,128,0,1,fp8,fp8,0,7.9107411702473955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,float16,0,13.190315246582031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,fp8,0,13.122047424316406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,float16,0,13.288959503173828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,fp8,0,12.990975697835287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,24,8,128,0,1,fp8,fp8,0,8.097621281941732
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,float16,0,6.759936014811198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,fp8,0,7.034197489420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,24,128,0,1,fp8,fp8,0,4.682410558064778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,float16,0,6.438912073771159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,fp8,0,6.329685211181641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,1,128,0,1,fp8,fp8,0,3.534165382385254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,float16,0,6.356650670369466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,2,128,0,1,fp8,fp8,0,3.5370667775472007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,fp8,0,6.389418919881185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,float16,0,6.399829228719075
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,4,128,0,1,fp8,fp8,0,3.748863855997721
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,fp8,0,6.597973505655925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,float16,0,6.441984176635742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,fp8,0,6.322858810424805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,24,8,128,0,1,fp8,fp8,0,3.791701316833496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,float16,0,3.446784019470215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,24,128,0,1,fp8,fp8,0,2.3202133178710938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,fp8,0,3.4682881037394204
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,float16,0,2.8945067723592124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,1,128,0,1,fp8,fp8,0,1.7174186706542969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,fp8,0,2.871466636657715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,float16,0,2.834773381551107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,fp8,0,2.836480140686035
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,2,128,0,1,fp8,fp8,0,1.7515519460042317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,float16,0,2.9356374740600586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,4,128,0,1,fp8,fp8,0,1.8740906715393066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,fp8,0,2.9723307291666665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,float16,0,3.072341283162435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,fp8,0,3.130197207132975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,24,8,128,0,1,fp8,fp8,0,1.8930346171061199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,float16,0,1.7249280611673992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,fp8,0,1.7087146441141765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,24,128,0,1,fp8,fp8,0,1.1538773377736409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,float16,0,1.4878719647725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,1,128,0,1,fp8,fp8,0,0.8714240392049154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,fp8,0,1.4696106910705566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,float16,0,1.4795093536376953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,fp8,0,1.4337706565856934
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,2,128,0,1,fp8,fp8,0,0.8690346876780192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,float16,0,1.4409386316935222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,fp8,0,1.4493014017740886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,4,128,0,1,fp8,fp8,0,0.9132373332977295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,float16,0,1.4557867050170898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,fp8,0,1.4815573692321777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,24,8,128,0,1,fp8,fp8,0,0.9352533022562662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,1,128,0,1,fp8,fp8,0,15.784788767496744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,float16,0,24.78967539469401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,2,128,0,1,fp8,fp8,0,15.8646608988444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,fp8,0,25.19074249267578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,float16,0,25.5105717976888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,fp8,0,24.79803721110026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,float16,0,25.6901117960612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,fp8,0,25.690282185872395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,4,128,0,1,fp8,fp8,0,15.860565185546875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,8,128,0,1,fp8,fp8,0,16.949930826822918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,float16,0,14.000811258951822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,24,128,0,1,fp8,fp8,0,9.981952031453451
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,fp8,0,14.424235026041666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,float16,0,25.553578694661457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,fp8,0,25.617408752441406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,float16,0,12.524885813395182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,fp8,0,11.826517740885416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,1,128,0,1,fp8,fp8,0,7.045461018880208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,2,128,0,1,fp8,fp8,0,7.382698694864909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,float16,0,12.299605051676432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,fp8,0,12.182868957519531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,4,128,0,1,fp8,fp8,0,7.682048161824544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,float16,0,11.994794209798178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,fp8,0,12.488703409830729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,float16,0,12.56482187906901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,fp8,0,12.64571762084961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,float16,0,6.906197230021159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,24,8,128,0,1,fp8,fp8,0,8.166570663452148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,24,128,0,1,fp8,fp8,0,4.9476267496744795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,fp8,0,6.94920539855957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,fp8,0,5.951658884684245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,float16,0,5.885781606038411
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,1,128,0,1,fp8,fp8,0,3.3783467610677085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,float16,0,5.874005635579427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,2,128,0,1,fp8,fp8,0,3.564202626546224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,fp8,0,5.626538594563802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,fp8,0,5.739861170450847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,4,128,0,1,fp8,fp8,0,3.5879252751668296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,float16,0,6.095359802246094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,float16,0,5.999104181925456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,fp8,0,6.02180290222168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,24,8,128,0,1,fp8,fp8,0,3.8056958516438804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,float16,0,3.5263147354125977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,fp8,0,3.433984120686849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,float16,0,2.712575912475586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,24,128,0,1,fp8,fp8,0,2.4231252670288086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,fp8,0,2.7234986623128257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,1,128,0,1,fp8,fp8,0,1.6488107045491536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,float16,0,2.7608747482299805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,fp8,0,2.7083094914754233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,2,128,0,1,fp8,fp8,0,1.7720319430033367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,float16,0,2.819925308227539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,4,128,0,1,fp8,fp8,0,1.810431957244873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,fp8,0,2.83409055074056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,float16,0,3.0091946919759116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,fp8,0,2.9049173990885415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,24,8,128,0,1,fp8,fp8,0,1.8942294120788574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,float16,0,1.713322639465332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,fp8,0,1.7160533269246419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,24,128,0,1,fp8,fp8,0,1.1915946801503499
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,float16,0,1.3305173714955647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,fp8,0,1.3076480229695637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,1,128,0,1,fp8,fp8,0,0.8046933015187582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,float16,0,1.2999680042266846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,fp8,0,1.304917335510254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,2,128,0,1,fp8,fp8,0,0.8057173093159994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,float16,0,1.3772800763448079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,fp8,0,1.3341013590494792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,4,128,0,1,fp8,fp8,0,0.8668159643809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,float16,0,1.4155093828837078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,fp8,0,1.4194347063700359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,24,8,128,0,1,fp8,fp8,0,0.9081172943115234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,float16,0,0.7898453076680502
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,fp8,0,0.745130697886149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,float16,0,0.7067306836446127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,24,128,0,1,fp8,fp8,0,0.5638826688130697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,fp8,0,0.7162880102793375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,1,128,0,1,fp8,fp8,0,0.4384426673253377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,fp8,0,0.7099733352661133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,float16,0,0.7174826463063558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,2,128,0,1,fp8,fp8,0,0.43724799156188965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,float16,0,0.7098026275634766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,fp8,0,0.7133866945902506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,4,128,0,1,fp8,fp8,0,0.4498773415883382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,fp8,0,0.7200427055358887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,float16,0,0.7294293244679769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,24,8,128,0,1,fp8,fp8,0,0.4391253391901652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,float16,0,15.260842641194662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,1,128,0,1,fp8,fp8,0,8.904533386230469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,fp8,0,14.201002756754557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,2,128,0,1,fp8,fp8,0,9.21343994140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,float16,0,14.75942357381185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,fp8,0,14.66982396443685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,float16,0,14.873770395914713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,fp8,0,14.816426595052084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,4,128,0,1,fp8,fp8,0,9.976832071940104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,8,128,0,1,fp8,fp8,0,10.509312311808268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,float16,0,8.702634811401367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,24,128,0,1,fp8,fp8,0,6.4774824778238935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,float16,0,15.119701385498047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,fp8,0,8.815104166666666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,float16,0,7.0910294850667315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,fp8,0,15.383040110270182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,fp8,0,7.354197184244792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,1,128,0,1,fp8,fp8,0,4.239871978759766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,2,128,0,1,fp8,fp8,0,4.264447848002116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,4,128,0,1,fp8,fp8,0,4.471295992533366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,float16,0,6.610432306925456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,fp8,0,7.093589146931966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,float16,0,7.164757410685222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,fp8,0,6.816597620646159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,8,128,0,1,fp8,fp8,0,4.917759895324707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,float16,0,7.487317403157552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,fp8,0,7.422805150349935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,float16,0,4.337493260701497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,24,128,0,1,fp8,fp8,0,3.1778132120768228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,fp8,0,4.323328018188477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,fp8,0,3.2189439137776694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,float16,0,3.3486506144205728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,1,128,0,1,fp8,fp8,0,2.0421973864237466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,2,128,0,1,fp8,fp8,0,2.112511952718099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,fp8,0,3.3138345082600913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,4,128,0,1,fp8,fp8,0,2.190336068471273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,float16,0,3.397120157877604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,fp8,0,3.4063361485799155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,float16,0,3.4705066680908203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,float16,0,3.6771841049194336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,fp8,0,3.5413331985473633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,24,8,128,0,1,fp8,fp8,0,2.3828479448954263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,float16,0,2.1512533823649087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,fp8,0,2.1070507367451987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,float16,0,1.5566506385803223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,24,128,0,1,fp8,fp8,0,1.5308799743652344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,fp8,0,1.5738879839579265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,1,128,0,1,fp8,fp8,0,1.0222933292388916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,float16,0,1.5747413635253906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,2,128,0,1,fp8,fp8,0,1.0173439979553223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,fp8,0,1.581567923227946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,float16,0,1.652224063873291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,fp8,0,1.6537599563598633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,4,128,0,1,fp8,fp8,0,1.0883413155873616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,float16,0,1.803434689839681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,fp8,0,1.787050724029541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,24,8,128,0,1,fp8,fp8,0,1.1798186302185059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,float16,0,1.0521600246429443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,fp8,0,1.0159786542256672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,24,128,0,1,fp8,fp8,0,0.746666669845581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,float16,0,0.7889920075734457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,fp8,0,0.799402634302775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,1,128,0,1,fp8,fp8,0,0.4833279848098755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,float16,0,0.8116906483968099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,fp8,0,0.785749355951945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,2,128,0,1,fp8,fp8,0,0.47018667062123615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,4,128,0,1,fp8,fp8,0,0.5031253496805826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,fp8,0,0.7797760168711344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,float16,0,0.7847253481547037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,8,128,0,1,fp8,fp8,0,0.5579093297322592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,float16,0,0.8270506858825684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,fp8,0,0.814079999923706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,float16,0,0.45431466897328693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,fp8,0,0.4536319971084595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,24,128,0,1,fp8,fp8,0,0.31436800956726074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,float16,0,0.41676799456278485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,fp8,0,0.42188799381256104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,1,128,0,1,fp8,fp8,0,0.2826240062713623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,fp8,0,0.43144532044728595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,float16,0,0.42854400475819904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,2,128,0,1,fp8,fp8,0,0.27613866329193115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,float16,0,0.4283733367919922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,fp8,0,0.43298133214314777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,4,128,0,1,fp8,fp8,0,0.2725546757380168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,float16,0,0.43775999546051025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,fp8,0,0.4519253174463908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,24,8,128,0,1,fp8,fp8,0,0.28091732660929364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,float16,0,14.710955301920572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,1,128,0,1,fp8,fp8,0,9.8428586324056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,2,128,0,1,fp8,fp8,0,10.025813420613607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,fp8,0,14.388565063476562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,float16,0,14.435328165690104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,fp8,0,14.453418731689453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,float16,0,15.145642598470053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,fp8,0,15.053824106852213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,4,128,0,1,fp8,fp8,0,10.725205739339193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,float16,0,9.275221506754557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,fp8,0,9.264127731323242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,8,128,0,1,fp8,fp8,0,11.60055414835612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,float16,0,15.541759490966797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,24,128,0,1,fp8,fp8,0,7.305898666381836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,float16,0,6.831616083780925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,fp8,0,15.458304087320963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,fp8,0,7.121578852335612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,1,128,0,1,fp8,fp8,0,4.441941261291504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,2,128,0,1,fp8,fp8,0,4.555434544881185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,float16,0,6.6252797444661455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,4,128,0,1,fp8,fp8,0,4.874922752380371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,fp8,0,6.686378479003906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,float16,0,7.021909077962239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,fp8,0,7.1048533121744795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,float16,0,7.476736068725586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,24,128,0,1,fp8,fp8,0,3.537578582763672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,float16,0,4.600831985473633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,fp8,0,4.584277470906575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,float16,0,3.339776039123535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,8,128,0,1,fp8,fp8,0,5.415594736735026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,fp8,0,3.295232137044271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,fp8,0,7.451818466186523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,1,128,0,1,fp8,fp8,0,2.130261262257894
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,float16,0,3.2781651814778647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,2,128,0,1,fp8,fp8,0,2.1838506062825522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,fp8,0,3.3082027435302734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,4,128,0,1,fp8,fp8,0,2.3354026476542153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,float16,0,3.518634796142578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,fp8,0,3.435861269632975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,float16,0,3.708245277404785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,8,128,0,1,fp8,fp8,0,2.535594622294108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,fp8,0,3.723605473836263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,float16,0,2.287615935007731
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,24,128,0,1,fp8,fp8,0,1.7204906145731609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,fp8,0,2.252117315928141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,fp8,0,1.5677439371744792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,float16,0,1.608191967010498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,1,128,0,1,fp8,fp8,0,1.0417493184407551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,2,128,0,1,fp8,fp8,0,1.0653013388315837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,float16,0,1.6157013575236003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,fp8,0,1.6302080154418945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,4,128,0,1,fp8,fp8,0,1.1275946299235027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,fp8,0,1.6709973017374675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,float16,0,1.7109333674112956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,float16,0,1.8508799870808919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,fp8,0,1.8049707412719727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,float16,0,1.1195733547210693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,24,8,128,0,1,fp8,fp8,0,1.2417706648508708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,24,128,0,1,fp8,fp8,0,0.8342186609903971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,float16,0,0.719701369603475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,fp8,0,1.0977280139923096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,1,128,0,1,fp8,fp8,0,0.4904959996541341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,fp8,0,0.7381333510080973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,float16,0,0.7441066900889078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,fp8,0,0.7278933525085449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,2,128,0,1,fp8,fp8,0,0.4927146832148234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,float16,0,0.7632213433583578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,4,128,0,1,fp8,fp8,0,0.5157546599706014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,float16,0,0.8719360033671061
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,8,128,0,1,fp8,fp8,0,0.5949440002441406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,fp8,0,0.7519573370615641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,fp8,0,0.8407039642333984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,float16,0,0.4916906754175822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,fp8,0,0.4437333345413208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,24,128,0,1,fp8,fp8,0,0.3826346794764201
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,float16,0,0.38707200686136883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,fp8,0,0.400383989016215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,1,128,0,1,fp8,fp8,0,0.24166399240493774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,float16,0,0.3906559944152832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,fp8,0,0.39662933349609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,2,128,0,1,fp8,fp8,0,0.2409813404083252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,float16,0,0.39185067017873126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,fp8,0,0.3935573498408
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,4,128,0,1,fp8,fp8,0,0.24166399240493774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,float16,0,0.40635732809702557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,8,128,0,1,fp8,fp8,0,0.24081067244211832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,float16,0,0.23569067319234213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,fp8,0,0.4010666608810425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,fp8,0,0.23176532983779907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,24,128,0,1,fp8,fp8,0,0.15735466281572977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,float16,0,0.21538132429122925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,fp8,0,0.21572266022364298
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,float16,0,0.2167466680208842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,1,128,0,1,fp8,fp8,0,0.1495039959748586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,fp8,0,0.21606399615605673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,2,128,0,1,fp8,fp8,0,0.14779733618100485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,float16,0,0.21845332781473795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,fp8,0,0.21794132391611734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,4,128,0,1,fp8,fp8,0,0.1462613344192505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,fp8,0,0.2172586719195048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,float16,0,0.22664533058802286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,24,8,128,0,1,fp8,fp8,0,0.14779733618100485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,fp8,0,8.732159932454428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,float16,0,8.866474787394205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,1,128,0,1,fp8,fp8,0,6.125738779703776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,2,128,0,1,fp8,fp8,0,6.186496098836263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,fp8,0,8.619690577189127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,float16,0,8.622933069864908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,fp8,0,9.13254419962565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,float16,0,9.17196782430013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,4,128,0,1,fp8,fp8,0,6.766762415568034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,float16,0,9.776810963948568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,fp8,0,9.509546915690104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,float16,0,6.025557200113933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,fp8,0,6.048767725626628
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,float16,0,4.129450798034668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,24,8,128,0,1,fp8,fp8,0,7.422463734944661
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,24,128,0,1,fp8,fp8,0,4.89079475402832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,fp8,0,4.063914616902669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,1,128,0,1,fp8,fp8,0,2.7769174575805664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,float16,0,4.120575904846191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,2,128,0,1,fp8,fp8,0,2.83460267384847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,fp8,0,4.127402623494466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,4,128,0,1,fp8,fp8,0,3.094186782836914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,float16,0,4.391594568888347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,fp8,0,4.333738644917806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,float16,0,4.684970537821452
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,fp8,0,4.724394798278809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,24,8,128,0,1,fp8,fp8,0,3.4269866943359375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,float16,0,3.0115839640299478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,fp8,0,2.97267214457194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,fp8,0,1.9770026206970215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,24,128,0,1,fp8,fp8,0,2.3792640368143716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,float16,0,2.034005324045817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,1,128,0,1,fp8,fp8,0,1.3405866622924805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,float16,0,2.078549385070801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,fp8,0,2.0382720629374185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,2,128,0,1,fp8,fp8,0,1.3844480514526367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,float16,0,2.145962715148926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,fp8,0,2.1203625996907554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,4,128,0,1,fp8,fp8,0,1.481386661529541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,float16,0,2.33949867884318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,fp8,0,2.313386599222819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,24,8,128,0,1,fp8,fp8,0,1.645055929819743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,float16,0,1.4859946568806965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,fp8,0,1.4665385882059734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,24,128,0,1,fp8,fp8,0,1.1226452986399333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,float16,0,0.9292799631754557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,fp8,0,0.9497600396474203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,1,128,0,1,fp8,fp8,0,0.6498986482620239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,float16,0,0.9714346726735433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,fp8,0,0.9709226290384928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,2,128,0,1,fp8,fp8,0,0.6500693162282308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,float16,0,1.0455040136973064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,4,128,0,1,fp8,fp8,0,0.7039999961853027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,fp8,0,1.0084693431854248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,float16,0,1.145855983098348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,fp8,0,1.1158186594645183
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,24,8,128,0,1,fp8,fp8,0,0.8118613560994467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,fp8,0,0.6695253054300944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,float16,0,0.7137280305226644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,24,128,0,1,fp8,fp8,0,0.5471573273340861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,float16,0,0.4333226680755615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,fp8,0,0.4399786790211995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,1,128,0,1,fp8,fp8,0,0.28330665826797485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,float16,0,0.4538026650746663
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,fp8,0,0.44970667362213135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,2,128,0,1,fp8,fp8,0,0.2752853234608968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,float16,0,0.45431466897328693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,fp8,0,0.45021867752075195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,4,128,0,1,fp8,fp8,0,0.2863786617914836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,float16,0,0.47257598241170246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,fp8,0,0.47940266132354736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,24,8,128,0,1,fp8,fp8,0,0.3520853519439697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,float16,0,0.2677759925524394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,fp8,0,0.2585600018501282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,24,128,0,1,fp8,fp8,0,0.1843199928601583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,float16,0,0.2333013415336609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,fp8,0,0.2302293380101522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,1,128,0,1,fp8,fp8,0,0.1513813336690267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,float16,0,0.2409813404083252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,fp8,0,0.23654399315516153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,2,128,0,1,fp8,fp8,0,0.15684266885121664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,float16,0,0.2384213407834371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,4,128,0,1,fp8,fp8,0,0.15667200088500977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,fp8,0,0.24115200837453207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,float16,0,0.24081067244211832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,fp8,0,0.24524799982706705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,24,8,128,0,1,fp8,fp8,0,0.16025599837303162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,float16,0,0.15615999698638916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,fp8,0,0.1570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,24,128,0,1,fp8,fp8,0,0.10427733262379964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,float16,0,0.14523733655611673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,1,128,0,1,fp8,fp8,0,0.10205866893132527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,fp8,0,0.14813866217931113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,float16,0,0.14762666821479797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,fp8,0,0.15052800377209982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,2,128,0,1,fp8,fp8,0,0.10240000486373901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,float16,0,0.15052800377209982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,fp8,0,0.15172266960144043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,float16,0,0.14472533265749613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,4,128,0,1,fp8,fp8,0,0.10427733262379964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,fp8,0,0.1462613344192505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,24,8,128,0,1,fp8,fp8,0,0.10342400272687276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,float16,0,8.875861485799154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,fp8,0,8.846506754557291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,1,128,0,1,fp8,fp8,0,6.081365585327148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,float16,0,8.688469568888346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,2,128,0,1,fp8,fp8,0,6.280703862508138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,fp8,0,8.686250686645508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,float16,0,9.423701604207357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,fp8,0,9.214122772216797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,4,128,0,1,fp8,fp8,0,6.948352177937825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,float16,0,10.47978655497233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,float16,0,6.7302398681640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,float16,0,4.275541305541992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,fp8,0,6.38754145304362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,8,128,0,1,fp8,fp8,0,7.721984227498372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,fp8,0,10.029397328694662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,24,128,0,1,fp8,fp8,0,5.658965428670247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,fp8,0,4.313088099161784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,1,128,0,1,fp8,fp8,0,3.0245545705159507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,2,128,0,1,fp8,fp8,0,3.0858240127563477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,fp8,0,4.350805282592773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,float16,0,4.366677284240723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,float16,0,4.606805483500163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,4,128,0,1,fp8,fp8,0,3.390634536743164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,fp8,0,4.608341217041016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,float16,0,5.13757864634196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,float16,0,3.3568426767985025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,fp8,0,3.2040961583455405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,8,128,0,1,fp8,fp8,0,3.8190078735351562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,fp8,0,2.1411840120951333
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,24,128,0,1,fp8,fp8,0,2.741077423095703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,float16,0,2.15773868560791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,fp8,0,5.034154574076335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,1,128,0,1,fp8,fp8,0,1.5006720225016277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,2,128,0,1,fp8,fp8,0,1.55511474609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,float16,0,2.1917014122009277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,fp8,0,2.1473280588785806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,4,128,0,1,fp8,fp8,0,1.6351572672526042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,fp8,0,2.2787413597106934
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,float16,0,2.2922239303588867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,float16,0,2.5267200469970703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,fp8,0,2.4847359657287598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,float16,0,1.6803840001424153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,24,8,128,0,1,fp8,fp8,0,1.846442699432373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,float16,0,1.0045440196990967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,fp8,0,1.5993173917134602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,24,128,0,1,fp8,fp8,0,1.3494613965352376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,fp8,0,1.0064213275909424
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,1,128,0,1,fp8,fp8,0,0.7348906993865967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,float16,0,1.0799787044525146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,fp8,0,1.0388480027516682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,2,128,0,1,fp8,fp8,0,0.7244799931844076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,float16,0,1.1074559688568115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,fp8,0,1.087999979654948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,4,128,0,1,fp8,fp8,0,0.8031573295593262
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,float16,0,1.234602689743042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,fp8,0,1.2059306303660076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,24,8,128,0,1,fp8,fp8,0,0.9089706738789877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,fp8,0,0.7633919715881348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,float16,0,0.8161280155181885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,24,128,0,1,fp8,fp8,0,0.6546773513158163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,float16,0,0.43263999621073407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,fp8,0,0.4394666751225789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,1,128,0,1,fp8,fp8,0,0.292522668838501
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,float16,0,0.4510720173517863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,2,128,0,1,fp8,fp8,0,0.31112533807754517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,fp8,0,0.4551680088043213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,float16,0,0.4805973370869954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,fp8,0,0.46830932299296063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,4,128,0,1,fp8,fp8,0,0.35396265983581543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,float16,0,0.574293335278829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,fp8,0,0.5425493319829305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,24,8,128,0,1,fp8,fp8,0,0.42393600940704346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,float16,0,0.3399680058161418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,float16,0,0.21606399615605673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,fp8,0,0.29781333605448407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,24,128,0,1,fp8,fp8,0,0.2850133379300435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,fp8,0,0.21657600005467734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,1,128,0,1,fp8,fp8,0,0.1346560021241506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,float16,0,0.222378671169281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,fp8,0,0.2213546633720398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,2,128,0,1,fp8,fp8,0,0.1346560021241506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,float16,0,0.22254933913548788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,4,128,0,1,fp8,fp8,0,0.13414399822553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,fp8,0,0.22579199075698853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,float16,0,0.2239146629969279
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,fp8,0,0.22784000635147095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,24,8,128,0,1,fp8,fp8,0,0.1346560021241506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,float16,0,0.1327786644299825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,fp8,0,0.12748799721399942
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,24,128,0,1,fp8,fp8,0,0.08959999680519104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,float16,0,0.12236799796422322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,1,128,0,1,fp8,fp8,0,0.08618666728337605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,fp8,0,0.12356266379356384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,float16,0,0.12236799796422322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,fp8,0,0.12339199582735698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,2,128,0,1,fp8,fp8,0,0.08191999793052673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,float16,0,0.1781760056813558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,fp8,0,0.12032000223795573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,4,128,0,1,fp8,fp8,0,0.08618666728337605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,float16,0,0.12083199620246887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,fp8,0,0.12117333213488261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,24,8,128,0,1,fp8,fp8,0,0.08533333738644917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,float16,0,0.08874666690826416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,fp8,0,0.0795306662718455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,float16,0,0.08499200145403545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,24,128,0,1,fp8,fp8,0,0.050517335534095764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,fp8,0,0.07816533247629802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,1,128,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,float16,0,0.08516266942024231
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,2,128,0,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,fp8,0,0.08191999793052673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,float16,0,0.0769706666469574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,fp8,0,0.08277333279450734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,4,128,0,1,fp8,fp8,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,float16,0,0.07611733178297679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,fp8,0,0.08550399541854858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,24,8,128,0,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,1,128,0,1,fp8,fp8,0,4.754090627034505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,1,128,0,1,float16,fp8,0,6.341973622639974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,1,128,0,1,float16,float16,0,6.317397435506185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,2,128,0,1,float16,float16,0,6.495573043823242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,2,128,0,1,fp8,fp8,0,4.929877281188965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,2,128,0,1,float16,fp8,0,6.416554768880208
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,4,128,0,1,float16,float16,0,6.957738876342773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,4,128,0,1,float16,fp8,0,6.955008188883464
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,4,128,0,1,fp8,fp8,0,5.547349294026692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,8,128,0,1,float16,float16,0,8.090965270996094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,8,128,0,1,float16,fp8,0,7.649621327718099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,24,128,0,1,float16,float16,0,6.182399749755859
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,24,8,128,0,1,fp8,fp8,0,6.538922627766927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,24,128,0,1,float16,fp8,0,5.850112279256185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,1,128,0,1,float16,float16,0,3.24181334177653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,24,128,0,1,fp8,fp8,0,5.208234786987305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,1,128,0,1,float16,fp8,0,3.207167943318685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,1,128,0,1,fp8,fp8,0,2.3540053367614746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,2,128,0,1,float16,float16,0,3.286527951558431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,2,128,0,1,fp8,fp8,0,2.465280055999756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,2,128,0,1,float16,fp8,0,3.185663859049479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,4,128,0,1,fp8,fp8,0,2.6564265886942544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,4,128,0,1,float16,float16,0,3.4256213506062827
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,4,128,0,1,float16,fp8,0,3.4464426040649414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,8,128,0,1,float16,float16,0,3.955199877421061
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,8,128,0,1,float16,fp8,0,3.835733413696289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,24,8,128,0,1,fp8,fp8,0,3.188906669616699
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,24,128,0,1,float16,float16,0,3.1230293909708657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,1,128,0,1,float16,float16,0,1.5452159245808919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,24,128,0,1,float16,fp8,0,2.9585065841674805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,1,128,0,1,float16,fp8,0,1.5298560460408528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,24,128,0,1,fp8,fp8,0,2.550271987915039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,1,128,0,1,fp8,fp8,0,1.165824015935262
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,2,128,0,1,float16,float16,0,1.6199679374694824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,2,128,0,1,float16,fp8,0,1.5952213605244954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,2,128,0,1,fp8,fp8,0,1.2101973692576091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,4,128,0,1,float16,float16,0,1.6848212877909343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,4,128,0,1,float16,fp8,0,1.70905605951945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,4,128,0,1,fp8,fp8,0,1.3038933277130127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,8,128,0,1,float16,float16,0,1.9314346313476562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,8,128,0,1,float16,fp8,0,1.884160041809082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,24,8,128,0,1,fp8,fp8,0,1.55187193552653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,24,128,0,1,float16,float16,0,1.5614293416341145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,24,128,0,1,float16,fp8,0,1.474730650583903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,1,128,0,1,float16,float16,0,0.7178239822387695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,24,128,0,1,fp8,fp8,0,1.2195839881896973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,1,128,0,1,float16,fp8,0,0.7502506573994955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,1,128,0,1,fp8,fp8,0,0.5459626515706381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,2,128,0,1,float16,float16,0,0.7748266855875651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,2,128,0,1,float16,fp8,0,0.7753386497497559
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,2,128,0,1,fp8,fp8,0,0.5568853219350179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,4,128,0,1,float16,float16,0,0.8152746359507242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,4,128,0,1,float16,fp8,0,0.7942826747894287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,4,128,0,1,fp8,fp8,0,0.6012586752573649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,8,128,0,1,float16,fp8,0,0.9055573145548502
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,8,128,0,1,float16,float16,0,0.9412266413370768
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,24,8,128,0,1,fp8,fp8,0,0.7255040009816488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,24,128,0,1,float16,float16,0,0.7499093214670817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,24,128,0,1,float16,fp8,0,0.6929066975911459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,1,128,0,1,float16,fp8,0,0.2845013340314229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,24,128,0,1,fp8,fp8,0,0.5898240009943644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,1,128,0,1,float16,float16,0,0.28910932938257855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,1,128,0,1,fp8,fp8,0,0.21128533283869425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,2,128,0,1,float16,float16,0,0.3118079900741577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,2,128,0,1,float16,fp8,0,0.30395734310150146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,2,128,0,1,fp8,fp8,0,0.2367146611213684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,4,128,0,1,float16,float16,0,0.33928533395131427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,4,128,0,1,float16,fp8,0,0.32290132840474445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,4,128,0,1,fp8,fp8,0,0.279039998849233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,8,128,0,1,float16,float16,0,0.4194986820220947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,8,128,0,1,float16,fp8,0,0.39509332180023193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,24,8,128,0,1,fp8,fp8,0,0.33689598242441815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,24,128,0,1,float16,float16,0,0.2635093331336975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,24,128,0,1,fp8,fp8,0,0.2397866646448771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,24,128,0,1,float16,fp8,0,0.21333332856496176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,1,128,0,1,float16,float16,0,0.1327786644299825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,1,128,0,1,float16,fp8,0,0.13363200426101685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,1,128,0,1,fp8,fp8,0,0.09079466263453166
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,2,128,0,1,float16,float16,0,0.13772799571355185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,2,128,0,1,float16,fp8,0,0.14028799533843994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,4,128,0,1,float16,float16,0,0.14455466469128928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,2,128,0,1,fp8,fp8,0,0.09113599856694539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,4,128,0,1,float16,fp8,0,0.14148267110188803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,4,128,0,1,fp8,fp8,0,0.09130666653315227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,8,128,0,1,float16,float16,0,0.14523733655611673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,8,128,0,1,float16,fp8,0,0.14353066682815552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,24,8,128,0,1,fp8,fp8,0,0.09215999643007915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,24,128,0,1,float16,float16,0,0.09096533060073853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,24,128,0,1,float16,fp8,0,0.08840533097585042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,24,128,0,1,fp8,fp8,0,0.05751466751098633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,1,128,0,1,float16,float16,0,0.08004266520341237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,1,128,0,1,float16,fp8,0,0.07867733140786488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,1,128,0,1,fp8,fp8,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,2,128,0,1,float16,float16,0,0.07748266557852428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,2,128,0,1,float16,fp8,0,0.07867733140786488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,2,128,0,1,fp8,fp8,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,4,128,0,1,float16,float16,0,0.07884799937407176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,8,128,0,1,float16,float16,0,0.07901866734027863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,4,128,0,1,fp8,fp8,0,0.05205333232879639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,4,128,0,1,float16,fp8,0,0.07867733140786488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,8,128,0,1,float16,fp8,0,0.07901866734027863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,24,8,128,0,1,fp8,fp8,0,0.05273599922657013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,24,128,0,1,float16,float16,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,24,128,0,1,float16,fp8,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,24,128,0,1,fp8,fp8,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,1,128,0,1,float16,float16,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,1,128,0,1,float16,fp8,0,0.04727466901143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,1,128,0,1,fp8,fp8,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,2,128,0,1,float16,float16,0,0.046762665112813316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,2,128,0,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,2,128,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,4,128,0,1,float16,float16,0,0.04795733094215393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,4,128,0,1,fp8,fp8,0,0.034304000437259674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,4,128,0,1,float16,fp8,0,0.04727466901143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,8,128,0,1,float16,float16,0,0.04761599997679392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,8,128,0,1,fp8,fp8,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,24,8,128,0,1,float16,fp8,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,24,128,0,1,float16,float16,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,24,128,0,1,float16,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,24,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,1,128,0,1,float16,float16,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,1,128,0,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,2,128,0,1,float16,float16,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,1,128,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,2,128,0,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,2,128,0,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,4,128,0,1,float16,float16,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,4,128,0,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,4,128,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,8,128,0,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,8,128,0,1,float16,float16,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,24,8,128,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,1,128,0,1,fp8,fp8,0,2.0261546770731607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,1,128,0,1,float16,float16,0,2.7641172409057617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,1,128,0,1,float16,fp8,0,2.757120132446289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,2,128,0,1,float16,float16,0,2.9025281270345054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,2,128,0,1,float16,fp8,0,2.8349440892537436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,2,128,0,1,fp8,fp8,0,2.161322593688965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,4,128,0,1,float16,float16,0,3.2349866231282554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,4,128,0,1,float16,fp8,0,3.17525323232015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,4,128,0,1,fp8,fp8,0,2.4458239873250327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,8,128,0,1,float16,fp8,0,3.763711929321289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,8,128,0,1,float16,float16,0,3.8761812845865884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,24,8,128,0,1,fp8,fp8,0,3.0115839640299478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,24,128,0,1,float16,float16,0,3.1240533192952475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,24,128,0,1,float16,fp8,0,2.958677291870117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,1,128,0,1,float16,float16,0,1.3272746404012044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,24,128,0,1,fp8,fp8,0,2.5374719301859536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,1,128,0,1,float16,fp8,0,1.4238719940185547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,1,128,0,1,fp8,fp8,0,1.0199039777119954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,2,128,0,1,float16,float16,0,1.4371840159098308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,2,128,0,1,float16,fp8,0,1.4493014017740886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,2,128,0,1,fp8,fp8,0,1.0453333059946697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,4,128,0,1,float16,float16,0,1.5831039746602376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,4,128,0,1,float16,fp8,0,1.5469226837158203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,4,128,0,1,fp8,fp8,0,1.209173361460368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,8,128,0,1,float16,float16,0,1.906175931294759
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,8,128,0,1,float16,fp8,0,1.8501973152160645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,24,8,128,0,1,fp8,fp8,0,1.4561279614766438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,24,128,0,1,float16,float16,0,1.5638186136881511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,24,128,0,1,float16,fp8,0,1.4776320457458496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,24,128,0,1,fp8,fp8,0,1.25764266649882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,1,128,0,1,float16,float16,0,0.6545066833496094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,1,128,0,1,float16,fp8,0,0.6490453481674194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,1,128,0,1,fp8,fp8,0,0.47598934173583984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,2,128,0,1,float16,float16,0,0.696832021077474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,2,128,0,1,float16,fp8,0,0.6884693304697672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,2,128,0,1,fp8,fp8,0,0.5164373318354288
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,4,128,0,1,float16,float16,0,0.7995733420054117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,4,128,0,1,float16,fp8,0,0.7809706528981527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,4,128,0,1,fp8,fp8,0,0.5502293507258097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,8,128,0,1,float16,float16,0,0.9169920285542806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,8,128,0,1,float16,fp8,0,0.8888320128122965
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,24,8,128,0,1,fp8,fp8,0,0.6753280162811279
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,24,128,0,1,float16,float16,0,0.7490560213724772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,24,128,0,1,float16,fp8,0,0.6949546337127686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,1,128,0,1,float16,float16,0,0.2167466680208842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,24,128,0,1,fp8,fp8,0,0.5555200179417928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,1,128,0,1,float16,fp8,0,0.21760000785191855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,1,128,0,1,fp8,fp8,0,0.16708266735076904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,2,128,0,1,float16,float16,0,0.23057067394256592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,2,128,0,1,float16,fp8,0,0.2362026572227478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,2,128,0,1,fp8,fp8,0,0.18397865692774454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,4,128,0,1,float16,float16,0,0.2698240081469218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,4,128,0,1,float16,fp8,0,0.25497599442799884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,4,128,0,1,fp8,fp8,0,0.2218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,8,128,0,1,float16,float16,0,0.37751468022664386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,8,128,0,1,float16,fp8,0,0.34628268082936603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,24,8,128,0,1,fp8,fp8,0,0.28484266996383667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,24,128,0,1,float16,float16,0,0.24268800020217896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,24,128,0,1,float16,fp8,0,0.18261333306630453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,24,128,0,1,fp8,fp8,0,0.21316266059875488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,1,128,0,1,float16,float16,0,0.09489066402117412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,1,128,0,1,float16,fp8,0,0.09540266791979472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,1,128,0,1,fp8,fp8,0,0.06775466601053874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,2,128,0,1,float16,float16,0,0.09642666578292847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,2,128,0,1,float16,fp8,0,0.09454933802286784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,2,128,0,1,fp8,fp8,0,0.06809600194295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,4,128,0,1,float16,float16,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,4,128,0,1,fp8,fp8,0,0.06724266707897186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,4,128,0,1,float16,fp8,0,0.10188800096511841
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,8,128,0,1,float16,float16,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,8,128,0,1,float16,fp8,0,0.09864532947540283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,24,8,128,0,1,fp8,fp8,0,0.06877866884072621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,24,128,0,1,float16,float16,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,24,128,0,1,float16,fp8,0,0.05922133227189382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,24,128,0,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,1,128,0,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,1,128,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,1,128,0,1,float16,fp8,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,2,128,0,1,float16,float16,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,2,128,0,1,float16,fp8,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,2,128,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,4,128,0,1,float16,float16,0,0.05358933409055074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,4,128,0,1,float16,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,4,128,0,1,fp8,fp8,0,0.03942399968703588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,8,128,0,1,float16,float16,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,8,128,0,1,float16,fp8,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,24,8,128,0,1,fp8,fp8,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,24,128,0,1,float16,float16,0,0.036864000062147774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,24,128,0,1,float16,fp8,0,0.05529599885145823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,24,128,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,1,128,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,1,128,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,1,128,0,1,fp8,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,2,128,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,2,128,0,1,float16,fp8,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,2,128,0,1,fp8,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,4,128,0,1,float16,float16,0,0.034304000437259674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,4,128,0,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,4,128,0,1,fp8,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,8,128,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,8,128,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,24,8,128,0,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,24,128,0,1,float16,float16,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,24,128,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,24,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,1,128,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,1,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,1,128,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,2,128,0,1,float16,float16,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,2,128,0,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,2,128,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,4,128,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,4,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,4,128,0,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,8,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,8,128,0,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,24,8,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,24,128,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,24,128,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,24,128,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,1,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,1,128,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,1,128,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,2,128,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,2,128,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,2,128,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,4,128,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,4,128,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,4,128,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,8,128,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,8,128,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,24,8,128,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,1,128,0,1,float16,float16,0,1.3358079592386882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,1,128,0,1,float16,fp8,0,1.3255679607391357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,1,128,0,1,fp8,fp8,0,0.936959981918335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,2,128,0,1,float16,float16,0,1.407317320505778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,2,128,0,1,fp8,fp8,0,0.9983999729156494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,2,128,0,1,float16,fp8,0,1.3880319595336914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,4,128,0,1,float16,float16,0,1.5911253293355305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,4,128,0,1,float16,fp8,0,1.550165335337321
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,4,128,0,1,fp8,fp8,0,1.16974933942159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,8,128,0,1,float16,float16,0,1.9176106452941895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,8,128,0,1,float16,fp8,0,1.8501973152160645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,24,8,128,0,1,fp8,fp8,0,1.4564693768819172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,24,128,0,1,float16,float16,0,1.5820800463358562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,24,128,0,1,float16,fp8,0,1.4904319445292156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,24,128,0,1,fp8,fp8,0,1.222314675649007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,1,128,0,1,float16,float16,0,0.6355626583099365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,1,128,0,1,float16,fp8,0,0.6265173355738322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,1,128,0,1,fp8,fp8,0,0.4304213523864746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,2,128,0,1,float16,float16,0,0.6512639919916788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,2,128,0,1,float16,fp8,0,0.6446079810460409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,2,128,0,1,fp8,fp8,0,0.457045316696167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,4,128,0,1,float16,float16,0,0.7464959621429443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,4,128,0,1,float16,fp8,0,0.7364266713460287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,4,128,0,1,fp8,fp8,0,0.5210453271865845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,8,128,0,1,float16,float16,0,0.922111988067627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,8,128,0,1,float16,fp8,0,0.8884906768798828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,24,8,128,0,1,fp8,fp8,0,0.6676479975382487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,24,128,0,1,float16,float16,0,0.7551999886830648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,24,128,0,1,float16,fp8,0,0.6976853211720785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,24,128,0,1,fp8,fp8,0,0.5683199961980184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,1,128,0,1,float16,float16,0,0.17937066157658896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,1,128,0,1,float16,fp8,0,0.17544533809026083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,1,128,0,1,fp8,fp8,0,0.1288533310095469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,2,128,0,1,float16,float16,0,0.20804266134897867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,2,128,0,1,float16,fp8,0,0.2106026609738668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,2,128,0,1,fp8,fp8,0,0.16537599762280783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,4,128,0,1,float16,float16,0,0.26231465737024945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,4,128,0,1,float16,fp8,0,0.24627200762430826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,4,128,0,1,fp8,fp8,0,0.20155733823776245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,8,128,0,1,float16,float16,0,0.3809279998143514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,8,128,0,1,float16,fp8,0,0.3548159996668498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,24,8,128,0,1,fp8,fp8,0,0.2701653242111206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,24,128,0,1,float16,float16,0,0.24729599555333456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,24,128,0,1,float16,fp8,0,0.1771519978841146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,1,128,0,1,float16,float16,0,0.0730453332265218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,24,128,0,1,fp8,fp8,0,0.19575466712315878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,1,128,0,1,float16,fp8,0,0.07509333391984303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,1,128,0,1,fp8,fp8,0,0.05495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,2,128,0,1,float16,float16,0,0.07423999905586243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,2,128,0,1,float16,fp8,0,0.07679999868075053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,2,128,0,1,fp8,fp8,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,4,128,0,1,float16,float16,0,0.07799466451009114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,4,128,0,1,float16,fp8,0,0.07321600119272868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,4,128,0,1,fp8,fp8,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,8,128,0,1,float16,float16,0,0.0846506655216217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,8,128,0,1,float16,fp8,0,0.0766293356815974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,24,8,128,0,1,fp8,fp8,0,0.057002668579419456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,24,128,0,1,float16,float16,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,24,128,0,1,fp8,fp8,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,1,128,0,1,float16,float16,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,24,128,0,1,float16,fp8,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,1,128,0,1,float16,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,1,128,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,2,128,0,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,2,128,0,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,2,128,0,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,4,128,0,1,float16,float16,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,4,128,0,1,float16,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,4,128,0,1,fp8,fp8,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,8,128,0,1,float16,float16,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,8,128,0,1,float16,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,24,8,128,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,24,128,0,1,float16,float16,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,24,128,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,24,128,0,1,fp8,fp8,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,1,128,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,1,128,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,1,128,0,1,fp8,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,2,128,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,2,128,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,2,128,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,4,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,4,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,8,128,0,1,float16,float16,0,0.028330666323502857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,8,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,8,128,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,24,128,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,24,4,128,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,24,128,0,1,float16,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,1,128,0,1,float16,float16,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,24,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,1,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,1,128,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,2,128,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,2,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,2,128,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,4,128,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,4,128,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,8,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,4,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,24,128,0,1,float16,float16,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,8,128,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,24,8,128,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,24,128,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,24,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,1,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,1,128,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,1,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,2,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,2,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,2,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,4,128,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,4,128,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,8,128,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,4,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,8,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,24,8,128,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,24,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,24,128,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,24,128,0,1,fp8,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,1,128,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,1,128,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,1,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,2,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,2,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,2,128,0,1,fp8,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,4,128,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,4,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,8,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,4,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,8,128,0,1,fp8,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,24,8,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,1,128,0,1,float16,fp8,0,0.6167893409729004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,1,128,0,1,float16,float16,0,0.6169600089391073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,1,128,0,1,fp8,fp8,0,0.4068693319956462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,2,128,0,1,float16,float16,0,0.655189315478007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,2,128,0,1,float16,fp8,0,0.6498986482620239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,2,128,0,1,fp8,fp8,0,0.44236799081166583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,4,128,0,1,float16,float16,0,0.7478613058725992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,4,128,0,1,float16,fp8,0,0.730282704035441
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,4,128,0,1,fp8,fp8,0,0.5282133420308431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,8,128,0,1,float16,float16,0,0.9260373115539551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,8,128,0,1,float16,fp8,0,0.8845653533935547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,24,8,128,0,1,fp8,fp8,0,0.6790826320648193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,24,128,0,1,float16,float16,0,0.7570772965749105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,24,128,0,1,fp8,fp8,0,0.5741226673126221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,1,128,0,1,float16,float16,0,0.1802240014076233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,24,128,0,1,float16,fp8,0,0.733354647954305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,1,128,0,1,float16,fp8,0,0.17698132991790771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,1,128,0,1,fp8,fp8,0,0.11246933539708455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,2,128,0,1,float16,float16,0,0.20616533358891806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,2,128,0,1,float16,fp8,0,0.1976319948832194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,2,128,0,1,fp8,fp8,0,0.1520639955997467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,4,128,0,1,float16,float16,0,0.26129066944122314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,4,128,0,1,float16,fp8,0,0.24832000335057577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,4,128,0,1,fp8,fp8,0,0.19012266397476196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,8,128,0,1,float16,float16,0,0.3821226755777995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,8,128,0,1,fp8,fp8,0,0.2595840096473694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,24,8,128,0,1,float16,fp8,0,0.34969600041707355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,24,128,0,1,float16,float16,0,0.24217599630355835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,24,128,0,1,float16,fp8,0,0.1786880095799764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,24,128,0,1,fp8,fp8,0,0.1950719952583313
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,1,128,0,1,float16,float16,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,1,128,0,1,float16,fp8,0,0.06365866462389629
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,1,128,0,1,fp8,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,2,128,0,1,float16,float16,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,2,128,0,1,float16,fp8,0,0.06400000055631001
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,2,128,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,4,128,0,1,float16,float16,0,0.06297599772612254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,4,128,0,1,float16,fp8,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,4,128,0,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,8,128,0,1,float16,float16,0,0.077824001510938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,8,128,0,1,float16,fp8,0,0.06673066814740498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,24,128,0,1,float16,float16,0,0.04420266548792521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,24,8,128,0,1,fp8,fp8,0,0.05239466826121012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,24,128,0,1,float16,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,24,128,0,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,1,128,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,1,128,0,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,1,128,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,2,128,0,1,float16,float16,0,0.03618133316437403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,2,128,0,1,float16,fp8,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,2,128,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,4,128,0,1,float16,float16,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,4,128,0,1,float16,fp8,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,4,128,0,1,fp8,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,8,128,0,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,8,128,0,1,float16,fp8,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,24,8,128,0,1,fp8,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,24,128,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,24,128,0,1,float16,fp8,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,24,128,0,1,fp8,fp8,0,0.02218666672706604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,1,128,0,1,float16,float16,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,1,128,0,1,float16,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,1,128,0,1,fp8,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,2,128,0,1,float16,float16,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,2,128,0,1,float16,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,2,128,0,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,4,128,0,1,float16,float16,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,4,128,0,1,float16,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,4,128,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,8,128,0,1,float16,float16,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,8,128,0,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,24,8,128,0,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,24,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,24,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,24,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,1,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,1,128,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,1,128,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,2,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,2,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,2,128,0,1,fp8,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,4,128,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,4,128,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,4,128,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,8,128,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,8,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,24,8,128,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,24,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,24,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,24,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,1,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,1,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,1,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,2,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,2,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,2,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,4,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,4,128,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,4,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,8,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,8,128,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,24,8,128,0,1,fp8,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,24,128,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,24,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,24,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,2,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,1,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,1,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,1,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,2,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,2,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,4,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,4,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,4,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,8,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,8,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,24,8,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,24,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,24,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,24,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,1,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,2,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,2,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,4,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,4,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,4,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,8,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,8,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,24,8,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,1,128,0,1,float16,fp8,0,0.18056533734003702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,1,128,0,1,float16,float16,0,0.17937066157658896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,1,128,0,1,fp8,fp8,0,0.14062933127085367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,2,128,0,1,float16,float16,0,0.20309333006540933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,2,128,0,1,float16,fp8,0,0.19797333081563315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,2,128,0,1,fp8,fp8,0,0.14967466394106546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,4,128,0,1,float16,float16,0,0.26692267258961994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,4,128,0,1,float16,fp8,0,0.2476373314857483
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,4,128,0,1,fp8,fp8,0,0.20104533433914185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,8,128,0,1,float16,float16,0,0.3858773310979207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,8,128,0,1,float16,fp8,0,0.35447466373443604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,24,128,0,1,float16,float16,0,0.250709335009257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,24,8,128,0,1,fp8,fp8,0,0.27153066794077557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,24,128,0,1,float16,fp8,0,0.1790293256441752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,24,128,0,1,fp8,fp8,0,0.19746132691701254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,1,128,0,1,float16,float16,0,0.06570666531721751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,1,128,0,1,float16,fp8,0,0.06570666531721751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,1,128,0,1,fp8,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,2,128,0,1,float16,float16,0,0.06656000018119812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,2,128,0,1,float16,fp8,0,0.06656000018119812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,2,128,0,1,fp8,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,4,128,0,1,float16,float16,0,0.06638933221499126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,4,128,0,1,float16,fp8,0,0.06690133114655812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,4,128,0,1,fp8,fp8,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,8,128,0,1,float16,float16,0,0.07679999868075053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,8,128,0,1,float16,fp8,0,0.07082666456699371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,24,8,128,0,1,fp8,fp8,0,0.05717333157857259
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,24,128,0,1,float16,float16,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,24,128,0,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,24,128,0,1,fp8,fp8,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,1,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,1,128,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,1,128,0,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,2,128,0,1,float16,float16,0,0.03822933385769526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,2,128,0,1,float16,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,2,128,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,4,128,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,4,128,0,1,float16,float16,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,4,128,0,1,fp8,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,8,128,0,1,float16,fp8,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,8,128,0,1,float16,float16,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,24,8,128,0,1,fp8,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,24,128,0,1,float16,float16,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,24,128,0,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,24,128,0,1,fp8,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,1,128,0,1,float16,float16,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,1,128,0,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,2,128,0,1,float16,float16,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,1,128,0,1,float16,fp8,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,2,128,0,1,float16,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,2,128,0,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,4,128,0,1,float16,float16,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,4,128,0,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,4,128,0,1,fp8,fp8,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,8,128,0,1,float16,float16,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,8,128,0,1,float16,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,24,8,128,0,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,24,128,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,24,128,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,1,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,24,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,1,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,1,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,2,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,2,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,2,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,4,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,4,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,4,128,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,8,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,8,128,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,24,8,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,24,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,24,128,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,24,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,1,128,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,1,128,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,1,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,2,128,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,2,128,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,2,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,4,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,4,128,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,4,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,8,128,0,1,float16,float16,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,8,128,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,24,8,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,24,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,24,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,24,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,1,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,1,128,0,1,float16,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,1,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,2,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,2,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,2,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,4,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,4,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,4,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,8,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,8,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,24,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,24,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,24,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,24,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,1,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,2,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,2,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,4,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,8,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,8,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,24,8,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,24,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,24,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,24,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,1,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,4,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,8,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,8,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,24,8,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,1,128,0,1,float16,float16,0,0.09130666653315227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,1,128,0,1,float16,fp8,0,0.09181867043177287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,1,128,0,1,fp8,fp8,0,0.07355733215808868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,2,128,0,1,float16,float16,0,0.09198932846387227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,2,128,0,1,float16,fp8,0,0.091648002465566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,2,128,0,1,fp8,fp8,0,0.07441066702206929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,4,128,0,1,float16,float16,0,0.09352533022562663
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,4,128,0,1,float16,fp8,0,0.09250133236249287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,4,128,0,1,fp8,fp8,0,0.0747519979874293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,8,128,0,1,float16,float16,0,0.10700800021489461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,8,128,0,1,float16,fp8,0,0.09915733337402344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,24,8,128,0,1,fp8,fp8,0,0.077824001510938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,24,128,0,1,float16,float16,0,0.05751466751098633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,24,128,0,1,float16,fp8,0,0.05239466826121012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,24,128,0,1,fp8,fp8,0,0.04215466479460398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,1,128,0,1,float16,float16,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,1,128,0,1,float16,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,1,128,0,1,fp8,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,2,128,0,1,float16,float16,0,0.05034666756788889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,2,128,0,1,float16,fp8,0,0.05000533163547516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,2,128,0,1,fp8,fp8,0,0.04027733455101649
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,4,128,0,1,float16,float16,0,0.05034666756788889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,4,128,0,1,float16,fp8,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,4,128,0,1,fp8,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,8,128,0,1,float16,float16,0,0.05120000243186951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,8,128,0,1,float16,fp8,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,24,8,128,0,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,24,128,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,24,128,0,1,fp8,fp8,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,24,128,0,1,float16,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,1,128,0,1,float16,fp8,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,1,128,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,2,128,0,1,float16,float16,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,1,128,0,1,float16,float16,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,2,128,0,1,float16,fp8,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,2,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,4,128,0,1,float16,float16,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,4,128,0,1,float16,fp8,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,4,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,8,128,0,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,8,128,0,1,float16,float16,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,24,128,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,24,8,128,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,24,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,24,128,0,1,float16,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,1,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,1,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,1,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,2,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,2,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,4,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,4,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,2,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,4,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,8,128,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,8,128,0,1,float16,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,24,8,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,24,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,24,128,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,24,128,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,1,128,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,1,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,2,128,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,1,128,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,2,128,0,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,2,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,4,128,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,4,128,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,4,128,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,8,128,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,8,128,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,24,8,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,24,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,24,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,24,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,1,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,2,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,2,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,2,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,4,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,4,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,4,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,8,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,8,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,24,8,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,24,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,24,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,24,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,1,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,2,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,4,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,4,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,8,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,8,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,24,8,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,24,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,24,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,1,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,24,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,1,128,0,1,fp8,fp8,0,0.009178666397929192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,2,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,4,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,4,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,8,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,8,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,24,8,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,24,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,24,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,24,128,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,1,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,2,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,1,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,2,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,4,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,4,128,0,1,fp8,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,8,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,8,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,24,8,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,1,128,0,1,fp8,fp8,0,33.499305725097656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,2,128,0,1,fp8,fp8,0,34.377899169921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,float16,0,55.30470275878906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,fp8,0,56.03856913248698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,float16,0,55.15161641438802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,fp8,0,55.042388916015625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,float16,0,56.113321940104164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,fp8,0,56.44629414876302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,4,128,0,1,fp8,fp8,0,34.00874582926432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,16,128,0,1,fp8,fp8,0,17.455103556315105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,fp8,0,28.699134826660156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,float16,0,28.849835713704426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,8,128,0,1,fp8,fp8,0,34.66308339436849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,float16,0,27.65294901529948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,float16,0,58.05841064453125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,fp8,0,56.49698384602865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,1,128,0,1,fp8,fp8,0,16.23739751180013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,fp8,0,27.869183858235676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,2,128,0,1,fp8,fp8,0,16.55347188313802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,float16,0,27.467435201009113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,fp8,0,27.564715067545574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,float16,0,28.169044494628906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,4,128,0,1,fp8,fp8,0,16.45073064168294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,fp8,0,28.051625569661457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,8,128,0,1,fp8,fp8,0,17.028778076171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,float16,0,27.906901041666668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,fp8,0,28.378794352213543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,float16,0,14.367744445800781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,fp8,0,14.451712290445963
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,16,128,0,1,fp8,fp8,0,8.841557184855143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,float16,0,13.693611145019531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,fp8,0,14.04211171468099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,1,128,0,1,fp8,fp8,0,8.00716781616211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,2,128,0,1,fp8,fp8,0,7.946069081624349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,float16,0,13.617151896158854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,fp8,0,13.920426686604818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,float16,0,13.985791524251303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,4,128,0,1,fp8,fp8,0,7.980031967163086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,fp8,0,14.28155771891276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,8,128,0,1,fp8,fp8,0,8.422911961873373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,float16,0,14.482091267903646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,float16,0,6.6988372802734375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,16,128,0,1,fp8,fp8,0,4.256256103515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,fp8,0,7.32313601175944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,fp8,0,14.086485544840494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,float16,0,6.358528137207031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,fp8,0,7.074133555094401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,1,128,0,1,fp8,fp8,0,3.8005758921305337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,2,128,0,1,fp8,fp8,0,3.867136001586914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,float16,0,7.149909337361653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,fp8,0,6.812672297159831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,float16,0,7.0359039306640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,fp8,0,6.852607727050781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,4,128,0,1,fp8,fp8,0,3.926527976989746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,fp8,0,6.886912027994792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,float16,0,6.840319951375325
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,16,8,128,0,1,fp8,fp8,0,4.073472023010254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,1,128,0,1,fp8,fp8,0,19.04861831665039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,float16,0,32.35327911376953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,fp8,0,31.51172383626302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,2,128,0,1,fp8,fp8,0,18.972843170166016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,float16,0,32.05768585205078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,fp8,0,31.769940694173176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,fp8,0,31.9817377726237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,float16,0,32.54511006673177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,4,128,0,1,fp8,fp8,0,19.601579030354817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,float16,0,16.903167724609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,8,128,0,1,fp8,fp8,0,19.28123728434245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,fp8,0,16.548010508219402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,16,128,0,1,fp8,fp8,0,10.488661448160807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,fp8,0,32.58248647054037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,float16,0,33.20900217692057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,float16,0,15.847765604654947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,fp8,0,15.85220209757487
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,1,128,0,1,fp8,fp8,0,9.508010864257812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,2,128,0,1,fp8,fp8,0,9.480703989664713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,float16,0,15.93719482421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,fp8,0,15.735978444417318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,4,128,0,1,fp8,fp8,0,9.73636245727539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,fp8,0,16.47650146484375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,float16,0,15.751338958740234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,float16,0,16.28552500406901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,fp8,0,16.323754628499348
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,float16,0,8.235178629557291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,16,8,128,0,1,fp8,fp8,0,10.332501093546549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,fp8,0,8.381952285766602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,16,128,0,1,fp8,fp8,0,5.036032040913899
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,float16,0,8.008021036783854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,fp8,0,7.915178934733073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,1,128,0,1,fp8,fp8,0,4.384597460428874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,2,128,0,1,fp8,fp8,0,4.422656059265137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,fp8,0,7.679146448771159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,float16,0,7.7207895914713545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,float16,0,7.75270398457845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,fp8,0,8.32358423868815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,4,128,0,1,fp8,fp8,0,4.468906720479329
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,float16,0,8.001877466837565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,fp8,0,8.217088063557943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,16,8,128,0,1,fp8,fp8,0,4.632746696472168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,16,128,0,1,fp8,fp8,0,2.4791040420532227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,float16,0,4.020906766255696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,fp8,0,4.080298741658528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,float16,0,3.7860692342122397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,fp8,0,3.7771946589152017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,1,128,0,1,fp8,fp8,0,2.147157351175944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,2,128,0,1,fp8,fp8,0,2.156544049580892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,float16,0,3.696469306945801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,fp8,0,3.67633056640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,float16,0,3.7543252309163413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,fp8,0,3.7017599741617837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,4,128,0,1,fp8,fp8,0,2.2719146410624185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,float16,0,4.041557312011719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,8,128,0,1,fp8,fp8,0,2.293930689493815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,fp8,0,3.7538134256998696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,1,128,0,1,fp8,fp8,0,13.551615397135416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,float16,0,22.239402770996094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,fp8,0,22.097920735677082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,2,128,0,1,fp8,fp8,0,13.996373494466146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,float16,0,22.429183959960938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,fp8,0,22.071637471516926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,float16,0,22.330027262369793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,fp8,0,22.178815205891926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,4,128,0,1,fp8,fp8,0,13.93612798055013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,float16,0,12.13320541381836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,8,128,0,1,fp8,fp8,0,14.57851791381836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,fp8,0,12.203348795572916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,16,128,0,1,fp8,fp8,0,7.1866029103597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,float16,0,23.00347646077474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,fp8,0,22.867457071940105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,float16,0,11.150165557861328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,fp8,0,11.365717569986979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,1,128,0,1,fp8,fp8,0,6.721877415974935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,float16,0,11.655850728352865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,2,128,0,1,fp8,fp8,0,6.42628288269043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,fp8,0,11.19146728515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,float16,0,11.444906870524088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,4,128,0,1,fp8,fp8,0,6.503594716389974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,fp8,0,11.381930033365885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,float16,0,11.602261861165365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,fp8,0,11.503957112630209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,float16,0,5.88697624206543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,16,8,128,0,1,fp8,fp8,0,7.266304016113281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,fp8,0,5.956266403198242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,16,128,0,1,fp8,fp8,0,3.6125014623006186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,fp8,0,5.458773295084636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,float16,0,4.916394551595052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,1,128,0,1,fp8,fp8,0,3.0303573608398438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,2,128,0,1,fp8,fp8,0,3.1646718978881836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,float16,0,5.548885345458984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,fp8,0,4.916906674702962
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,fp8,0,5.254997253417969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,float16,0,5.541717529296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,4,128,0,1,fp8,fp8,0,3.116373380025228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,float16,0,5.597696304321289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,fp8,0,5.461845397949219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,16,8,128,0,1,fp8,fp8,0,3.3471145629882812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,fp8,0,2.86515204111735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,float16,0,2.923520088195801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,16,128,0,1,fp8,fp8,0,1.8177706400553386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,fp8,0,2.549760023752848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,float16,0,2.5888427098592124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,1,128,0,1,fp8,fp8,0,1.5112533569335938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,float16,0,2.558805306752523
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,2,128,0,1,fp8,fp8,0,1.522175947825114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,fp8,0,2.6282666524251304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,float16,0,2.6391894022623696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,fp8,0,2.5509546597798667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,4,128,0,1,fp8,fp8,0,1.5752533276875813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,float16,0,2.7163305282592773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,fp8,0,2.637141386667887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,16,8,128,0,1,fp8,fp8,0,1.655296007792155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,1,128,0,1,fp8,fp8,0,17.76708221435547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,2,128,0,1,fp8,fp8,0,18.040490468343098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,float16,0,29.879124959309895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,fp8,0,30.095359802246094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,float16,0,30.28565216064453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,fp8,0,29.932884216308594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,fp8,0,29.760854085286457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,float16,0,30.054400126139324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,4,128,0,1,fp8,fp8,0,18.807467142740887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,16,128,0,1,fp8,fp8,0,9.732778549194336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,float16,0,15.90289052327474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,fp8,0,15.931563059488932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,8,128,0,1,fp8,fp8,0,19.84000015258789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,float16,0,14.634496053059896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,float16,0,31.230122884114582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,fp8,0,30.711807250976562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,fp8,0,14.947327931722006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,1,128,0,1,fp8,fp8,0,8.513535817464193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,2,128,0,1,fp8,fp8,0,8.863744099934896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,float16,0,14.725290934244791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,fp8,0,14.663168589274088
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,float16,0,14.816426595052084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,4,128,0,1,fp8,fp8,0,8.972458521525065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,fp8,0,14.825300852457682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,float16,0,15.355220794677734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,fp8,0,14.899028778076172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,16,8,128,0,1,fp8,fp8,0,9.383594512939453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,float16,0,7.621290842692058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,fp8,0,7.773013432820638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,16,128,0,1,fp8,fp8,0,4.863658587137858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,float16,0,6.764544169108073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,fp8,0,7.4212690989176435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,1,128,0,1,fp8,fp8,0,4.127402623494466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,2,128,0,1,fp8,fp8,0,4.0763734181722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,float16,0,7.287978490193685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,fp8,0,7.316480000813802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,float16,0,7.018496195475261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,4,128,0,1,fp8,fp8,0,4.202837308247884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,fp8,0,7.133013407389323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,float16,0,7.334570566813151
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,8,128,0,1,fp8,fp8,0,4.398933410644531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,16,128,0,1,fp8,fp8,0,2.5132373174031577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,float16,0,3.8080854415893555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,fp8,0,7.640064239501953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,fp8,0,3.8748159408569336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,float16,0,3.192490577697754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,fp8,0,3.311786651611328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,1,128,0,1,fp8,fp8,0,2.0210347175598145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,2,128,0,1,fp8,fp8,0,1.9903146425882976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,float16,0,3.2779947916666665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,4,128,0,1,fp8,fp8,0,2.0432213147481284
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,fp8,0,3.294549306233724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,float16,0,3.454634666442871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,fp8,0,3.444053332010905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,float16,0,3.5496959686279297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,fp8,0,3.6331520080566406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,16,8,128,0,1,fp8,fp8,0,2.209109306335449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,16,128,0,1,fp8,fp8,0,1.253717343012492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,float16,0,1.9444053967793782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,fp8,0,1.8930346171061199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,float16,0,1.6971093813578289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,fp8,0,1.7078612645467122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,1,128,0,1,fp8,fp8,0,1.008128007253011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,float16,0,1.7174186706542969
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,fp8,0,1.6646827061971028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,2,128,0,1,fp8,fp8,0,1.0219519933064778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,fp8,0,1.662293275197347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,float16,0,1.6812373797098796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,4,128,0,1,fp8,fp8,0,1.0332159996032715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,float16,0,1.747968037923177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,fp8,0,1.6701439221700032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,16,8,128,0,1,fp8,fp8,0,1.1124053001403809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,1,128,0,1,fp8,fp8,0,10.50265630086263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,2,128,0,1,fp8,fp8,0,10.85986073811849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,float16,0,16.913066864013672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,fp8,0,17.32249577840169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,fp8,0,17.426090240478516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,float16,0,17.306453704833984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,float16,0,17.05181884765625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,fp8,0,17.57866668701172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,4,128,0,1,fp8,fp8,0,11.015338897705078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,16,128,0,1,fp8,fp8,0,6.176938374837239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,fp8,0,9.109504063924154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,float16,0,9.536682764689127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,8,128,0,1,fp8,fp8,0,11.346602121988932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,float16,0,17.845247904459637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,fp8,0,17.86299769083659
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,float16,0,8.679936091105143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,fp8,0,8.382122675577799
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,1,128,0,1,fp8,fp8,0,4.71500809987386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,2,128,0,1,fp8,fp8,0,4.977663993835449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,float16,0,8.543914794921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,fp8,0,8.446805318196615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,float16,0,8.414719899495443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,fp8,0,8.319999694824219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,4,128,0,1,fp8,fp8,0,5.2437334060668945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,float16,0,8.804351806640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,8,128,0,1,fp8,fp8,0,5.4823252360026045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,float16,0,4.586666742960612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,fp8,0,8.652458826700846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,fp8,0,4.635648091634114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,16,128,0,1,fp8,fp8,0,3.0950400034586587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,float16,0,3.8007465998331704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,fp8,0,3.957248051961263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,1,128,0,1,fp8,fp8,0,2.300586700439453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,float16,0,3.858432133992513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,2,128,0,1,fp8,fp8,0,2.3811413447062173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,fp8,0,3.8615039189656577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,float16,0,4.090197245279948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,fp8,0,4.1168212890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,4,128,0,1,fp8,fp8,0,2.477738698323568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,float16,0,4.286122639973958
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,fp8,0,4.208127975463867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,16,8,128,0,1,fp8,fp8,0,2.721280097961426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,float16,0,2.2894934018452964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,fp8,0,2.2748160362243652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,16,128,0,1,fp8,fp8,0,1.553749402364095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,float16,0,1.921877384185791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,fp8,0,1.8423466682434082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,1,128,0,1,fp8,fp8,0,1.1642879645029705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,float16,0,1.867263952891032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,2,128,0,1,fp8,fp8,0,1.1630933284759521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,fp8,0,1.8834773699442546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,float16,0,1.950719992319743
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,fp8,0,1.93723726272583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,4,128,0,1,fp8,fp8,0,1.2238506476084392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,float16,0,2.0529492696126304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,fp8,0,2.0962986946105957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,16,8,128,0,1,fp8,fp8,0,1.3071359793345134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,16,128,0,1,fp8,fp8,0,0.7343786557515463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,float16,0,1.0893653233846028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,fp8,0,1.0565973122914631
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,float16,0,1.0129066308339436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,1,128,0,1,fp8,fp8,0,0.617301344871521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,fp8,0,1.0129066308339436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,float16,0,1.0301439762115479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,fp8,0,1.0002773602803547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,2,128,0,1,fp8,fp8,0,0.6058666706085205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,float16,0,1.0023252964019775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,fp8,0,1.010858694712321
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,4,128,0,1,fp8,fp8,0,0.621397336324056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,float16,0,1.008639971415202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,fp8,0,1.0364586512247722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,16,8,128,0,1,fp8,fp8,0,0.6475093364715576
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,1,128,0,1,fp8,fp8,0,10.190848032633463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,2,128,0,1,fp8,fp8,0,10.287616093953451
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,float16,0,16.41164779663086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,fp8,0,16.78028742472331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,float16,0,16.369664510091145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,fp8,0,16.690858205159504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,float16,0,17.079978942871094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,fp8,0,16.738133748372395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,4,128,0,1,fp8,fp8,0,10.936832427978516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,16,128,0,1,fp8,fp8,0,6.671530405680339
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,float16,0,9.136298497517904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,fp8,0,9.34877840677897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,8,128,0,1,fp8,fp8,0,11.79904047648112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,float16,0,16.841898600260418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,fp8,0,17.277440388997395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,float16,0,7.660714467366536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,1,128,0,1,fp8,fp8,0,4.803754806518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,fp8,0,7.771989186604817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,2,128,0,1,fp8,fp8,0,4.800511995951335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,float16,0,8.110591888427734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,fp8,0,8.066730499267578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,4,128,0,1,fp8,fp8,0,5.259093284606934
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,float16,0,7.740586598714192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,fp8,0,8.039082845052084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,float16,0,4.681557337443034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,8,128,0,1,fp8,fp8,0,5.677055994669597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,float16,0,8.475306828816732
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,fp8,0,4.659029324849446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,16,128,0,1,fp8,fp8,0,3.3213440577189126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,fp8,0,8.164522806803385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,float16,0,3.657557487487793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,fp8,0,3.5543041229248047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,1,128,0,1,fp8,fp8,0,2.2896639506022134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,2,128,0,1,fp8,fp8,0,2.396501382191976
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,float16,0,3.71831480662028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,fp8,0,3.7319679260253906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,4,128,0,1,fp8,fp8,0,2.4516266187032065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,float16,0,3.9422292709350586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,fp8,0,3.78436279296875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,float16,0,4.088831901550293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,fp8,0,4.089685440063477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,float16,0,2.301952044169108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,16,128,0,1,fp8,fp8,0,1.5709865887959797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,16,8,128,0,1,fp8,fp8,0,2.7675307591756186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,float16,0,1.7902933756510417
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,fp8,0,1.7303892771402996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,fp8,0,2.280277411142985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,1,128,0,1,fp8,fp8,0,1.0733226935068767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,2,128,0,1,fp8,fp8,0,1.141760031382243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,float16,0,1.7947306632995605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,fp8,0,1.8027520179748535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,4,128,0,1,fp8,fp8,0,1.2233386834462483
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,float16,0,1.8996906280517578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,fp8,0,1.9123199780782063
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,float16,0,2.066943963368734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,fp8,0,1.9725653330485027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,16,8,128,0,1,fp8,fp8,0,1.3501440684000652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,float16,0,1.137664000193278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,fp8,0,1.0949973265329997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,1,128,0,1,fp8,fp8,0,0.5474986632664999
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,16,128,0,1,fp8,fp8,0,0.7690239747365316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,fp8,0,0.9227946599324545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,float16,0,0.9178453286488851
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,float16,0,0.8953173160552979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,fp8,0,0.899072011311849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,2,128,0,1,fp8,fp8,0,0.5463039875030518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,float16,0,0.9019733270009359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,fp8,0,0.9200639724731445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,4,128,0,1,fp8,fp8,0,0.5638826688130697
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,float16,0,0.9531733194986979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,8,128,0,1,fp8,fp8,0,0.6176426808039347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,fp8,0,0.9359359741210938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,float16,0,0.5157546599706014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,fp8,0,0.5193386475245158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,16,128,0,1,fp8,fp8,0,0.3269973397254944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,float16,0,0.5193386475245158
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,fp8,0,0.48110934098561603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,1,128,0,1,fp8,fp8,0,0.3222186764081319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,float16,0,0.4894719918568929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,fp8,0,0.4945919911066691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,2,128,0,1,fp8,fp8,0,0.3114666740099589
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,float16,0,0.49698134263356525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,4,128,0,1,fp8,fp8,0,0.318122665087382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,fp8,0,0.4945919911066691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,float16,0,0.5079040129979452
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,8,128,0,1,fp8,fp8,0,0.31214932600657147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,fp8,0,0.5171200037002563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,float16,0,9.845759709676107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,1,128,0,1,fp8,fp8,0,6.045354843139648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,2,128,0,1,fp8,fp8,0,6.244351704915364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,fp8,0,9.051136016845703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,fp8,0,9.808554967244467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,float16,0,9.718954722086588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,float16,0,9.927850723266602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,fp8,0,10.29034678141276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,4,128,0,1,fp8,fp8,0,6.876842498779297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,16,128,0,1,fp8,fp8,0,4.21836789449056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,float16,0,5.787647883097331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,fp8,0,5.854378382364909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,float16,0,4.505770683288574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,float16,0,10.281813303629557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,8,128,0,1,fp8,fp8,0,7.472810745239258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,fp8,0,10.327381134033203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,fp8,0,4.428458531697591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,1,128,0,1,fp8,fp8,0,2.787839889526367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,float16,0,4.4629332224528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,2,128,0,1,fp8,fp8,0,2.9672107696533203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,fp8,0,4.625066757202148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,4,128,0,1,fp8,fp8,0,3.115690549214681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,float16,0,4.646570523579915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,fp8,0,4.599466641743978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,float16,0,5.135018666585286
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,fp8,0,5.024256070454915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,fp8,0,2.887850761413574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,float16,0,2.9491198857625327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,16,8,128,0,1,fp8,fp8,0,3.511295954386393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,16,128,0,1,fp8,fp8,0,2.043392022450765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,fp8,0,2.1323092778523765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,float16,0,2.1587626139322915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,1,128,0,1,fp8,fp8,0,1.4011732737223308
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,float16,0,2.200064023335775
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,2,128,0,1,fp8,fp8,0,1.4624427159627278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,4,128,0,1,fp8,fp8,0,1.516544024149577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,fp8,0,2.2410240173339844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,float16,0,2.3152640660603843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,fp8,0,2.2894934018452964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,float16,0,2.5229652722676597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,fp8,0,2.516479969024658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,16,8,128,0,1,fp8,fp8,0,1.6872107187906902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,fp8,0,1.3851307233174641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,float16,0,1.014954646428426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,16,128,0,1,fp8,fp8,0,1.019221305847168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,float16,0,1.4504960378011067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,fp8,0,1.008128007253011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,1,128,0,1,fp8,fp8,0,0.6553599834442139
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,float16,0,1.0465280214945476
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,2,128,0,1,fp8,fp8,0,0.6661119858423868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,fp8,0,1.020586649576823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,float16,0,1.0885120232899983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,fp8,0,1.0881706873575847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,4,128,0,1,fp8,fp8,0,0.736255963643392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,fp8,0,1.1881813208262126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,float16,0,1.2136106491088867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,16,8,128,0,1,fp8,fp8,0,0.8357546329498291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,float16,0,0.6447786490122477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,fp8,0,0.5935786565144857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,16,128,0,1,fp8,fp8,0,0.46353065967559814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,float16,0,0.5485226710637411
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,fp8,0,0.5510826508204142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,1,128,0,1,fp8,fp8,0,0.3397973378499349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,float16,0,0.5469866593678793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,fp8,0,0.5565439860026041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,2,128,0,1,fp8,fp8,0,0.3333119948705037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,float16,0,0.5428906679153442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,fp8,0,0.5471573273340861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,4,128,0,1,fp8,fp8,0,0.33484800656636554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,float16,0,0.5681493282318115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,fp8,0,0.5589333375295004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,16,8,128,0,1,fp8,fp8,0,0.3454293409983317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,float16,0,0.32767999172210693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,fp8,0,0.3193173408508301
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,16,128,0,1,fp8,fp8,0,0.20172800620396933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,float16,0,0.2986666758855184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,fp8,0,0.2950826684633891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,1,128,0,1,fp8,fp8,0,0.20172800620396933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,float16,0,0.3037866751352946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,fp8,0,0.2991786599159241
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,2,128,0,1,fp8,fp8,0,0.2027519941329956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,fp8,0,0.2923520008722941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,float16,0,0.3015679915746053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,4,128,0,1,fp8,fp8,0,0.20138667027155557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,float16,0,0.3099306623140971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,8,128,0,1,fp8,fp8,0,0.2034346659978231
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,fp8,0,0.31692800919214886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,float16,0,9.763498942057291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,fp8,0,9.745237350463867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,1,128,0,1,fp8,fp8,0,6.559573491414388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,2,128,0,1,fp8,fp8,0,6.8145491282145185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,float16,0,9.781759897867838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,fp8,0,9.556138356526693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,float16,0,10.181973139444986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,fp8,0,10.430976231892904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,4,128,0,1,fp8,fp8,0,7.412394841512044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,float16,0,6.149119695027669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,fp8,0,6.199637095133464
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,8,128,0,1,fp8,fp8,0,8.275456110636393
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,16,128,0,1,fp8,fp8,0,4.775936126708984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,float16,0,4.5247147878011065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,float16,0,10.93068822224935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,fp8,0,10.693120320638021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,fp8,0,4.351146697998047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,1,128,0,1,fp8,fp8,0,2.9644800821940103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,2,128,0,1,fp8,fp8,0,3.1431681315104165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,float16,0,4.493653297424316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,4,128,0,1,fp8,fp8,0,3.3996801376342773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,fp8,0,4.580522537231445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,float16,0,4.862634658813477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,fp8,0,4.8537600835164385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,float16,0,5.241344134012858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,float16,0,3.0870186487833657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,16,128,0,1,fp8,fp8,0,2.337279955546061
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,fp8,0,3.044864018758138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,fp8,0,5.186047871907552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,16,8,128,0,1,fp8,fp8,0,3.9229440689086914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,float16,0,2.2121814092000327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,fp8,0,2.187434673309326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,1,128,0,1,fp8,fp8,0,1.4119253158569336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,float16,0,2.223445256551107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,2,128,0,1,fp8,fp8,0,1.465343952178955
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,fp8,0,2.2452905972798667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,float16,0,2.349738597869873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,4,128,0,1,fp8,fp8,0,1.6252586046854656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,fp8,0,2.3466666539510093
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,float16,0,2.633216063181559
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,fp8,0,2.569215933481852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,float16,0,1.5213227272033691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,16,128,0,1,fp8,fp8,0,1.105237325032552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,16,8,128,0,1,fp8,fp8,0,1.8343253135681152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,fp8,0,1.5034027099609375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,fp8,0,1.01256529490153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,float16,0,1.0337279637654622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,1,128,0,1,fp8,fp8,0,0.6743040084838867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,float16,0,1.0547200043996174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,2,128,0,1,fp8,fp8,0,0.7096319993336996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,fp8,0,1.035264015197754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,fp8,0,1.125205357869466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,float16,0,1.1327146689097087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,4,128,0,1,fp8,fp8,0,0.7818240324656168
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,fp8,0,1.2588373025258381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,float16,0,1.295360008875529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,16,8,128,0,1,fp8,fp8,0,0.9019733270009359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,float16,0,0.7237973213195801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,fp8,0,0.6813013553619385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,16,128,0,1,fp8,fp8,0,0.5439146757125854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,float16,0,0.49851731459299725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,fp8,0,0.5012480020523071
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,1,128,0,1,fp8,fp8,0,0.3056640028953552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,float16,0,0.5154133240381876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,fp8,0,0.49851731459299725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,2,128,0,1,fp8,fp8,0,0.303274671236674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,float16,0,0.4983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,4,128,0,1,fp8,fp8,0,0.314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,fp8,0,0.5126826763153076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,float16,0,0.5517653226852417
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,fp8,0,0.5379413366317749
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,16,8,128,0,1,fp8,fp8,0,0.4094293514887492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,float16,0,0.2916693290074666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,fp8,0,0.291157325108846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,16,128,0,1,fp8,fp8,0,0.1786880095799764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,float16,0,0.2650453249613444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,fp8,0,0.2658986647923787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,1,128,0,1,fp8,fp8,0,0.17698132991790771
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,float16,0,0.27323732773462933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,fp8,0,0.2711893320083618
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,2,128,0,1,fp8,fp8,0,0.17459199825922647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,float16,0,0.2764799992243449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,fp8,0,0.26709334055582684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,4,128,0,1,fp8,fp8,0,0.17390932639439902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,float16,0,0.28808534145355225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,fp8,0,0.2853546738624573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,16,8,128,0,1,fp8,fp8,0,0.1786880095799764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,float16,0,0.17988266547520956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,fp8,0,0.17339734236399332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,16,128,0,1,fp8,fp8,0,0.11195733149846394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,float16,0,0.17032533884048462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,fp8,0,0.169813334941864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,1,128,0,1,fp8,fp8,0,0.11383466919263203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,float16,0,0.169813334941864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,fp8,0,0.17356799046198526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,2,128,0,1,fp8,fp8,0,0.11434666315714519
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,float16,0,0.1713493267695109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,fp8,0,0.17254400253295898
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,4,128,0,1,fp8,fp8,0,0.11127466956774394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,float16,0,0.17271467049916586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,fp8,0,0.1687893271446228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,16,8,128,0,1,fp8,fp8,0,0.11571199695269267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,float16,0,5.897045135498047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,fp8,0,5.894826889038086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,1,128,0,1,fp8,fp8,0,4.038997332255046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,float16,0,5.827925364176433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,fp8,0,5.8683732350667315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,2,128,0,1,fp8,fp8,0,4.245333353678386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,float16,0,6.281898498535156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,fp8,0,6.2506669362386065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,4,128,0,1,fp8,fp8,0,4.749653180440267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,float16,0,6.849877039591472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,float16,0,4.0715945561726885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,fp8,0,6.842709223429362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,fp8,0,4.041557312011719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,float16,0,2.746368090311686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,16,8,128,0,1,fp8,fp8,0,5.2536319096883135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,16,128,0,1,fp8,fp8,0,3.188394546508789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,fp8,0,2.7567787170410156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,1,128,0,1,fp8,fp8,0,1.8699946403503418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,float16,0,2.780501365661621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,2,128,0,1,fp8,fp8,0,1.9469653765360515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,fp8,0,2.7970558802286782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,4,128,0,1,fp8,fp8,0,2.1717333793640137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,float16,0,2.9999787012736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,fp8,0,2.958847999572754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,float16,0,3.384490648905436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,fp8,0,3.3882452646891275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,float16,0,2.018986701965332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,16,8,128,0,1,fp8,fp8,0,2.4852479298909507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,fp8,0,1.9865600268046062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,fp8,0,1.3073066870371501
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,float16,0,1.3195947011311848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,16,128,0,1,fp8,fp8,0,1.5373652776082356
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,1,128,0,1,fp8,fp8,0,0.8963413238525391
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,float16,0,1.3941760063171387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,2,128,0,1,fp8,fp8,0,0.9294506708780924
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,fp8,0,1.3460480372111003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,fp8,0,1.4402559598286946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,4,128,0,1,fp8,fp8,0,1.0287786324818928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,float16,0,1.4609066645304363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,float16,0,1.6583679517110188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,fp8,0,1.6440320014953613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,float16,0,0.9710933367411295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,fp8,0,0.9313279787699381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,16,8,128,0,1,fp8,fp8,0,1.2025173505147297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,16,128,0,1,fp8,fp8,0,0.7483733495076498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,float16,0,0.5898240009943644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,1,128,0,1,fp8,fp8,0,0.3882666826248169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,fp8,0,0.5812906821568807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,float16,0,0.6196906566619873
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,fp8,0,0.5853866736094157
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,2,128,0,1,fp8,fp8,0,0.41147732734680176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,fp8,0,0.6440960168838501
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,float16,0,0.6813013553619385
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,4,128,0,1,fp8,fp8,0,0.47359999020894367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,float16,0,0.7883093357086182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,fp8,0,0.749397357304891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,16,8,128,0,1,fp8,fp8,0,0.5809493462244669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,float16,0,0.41437868277231854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,fp8,0,0.3700053294499715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,16,128,0,1,fp8,fp8,0,0.32921600341796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,fp8,0,0.304639995098114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,float16,0,0.31249066193898517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,1,128,0,1,fp8,fp8,0,0.1843199928601583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,float16,0,0.3051519989967346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,fp8,0,0.30719999472300213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,2,128,0,1,fp8,fp8,0,0.18705066045125326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,fp8,0,0.30958932638168335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,float16,0,0.306005338827769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,4,128,0,1,fp8,fp8,0,0.19268266359965006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,fp8,0,0.31914667288462323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,float16,0,0.3264853358268738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,16,8,128,0,1,fp8,fp8,0,0.2111146648724874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,float16,0,0.17988266547520956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,fp8,0,0.17732266585032144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,16,128,0,1,fp8,fp8,0,0.11520000298817952
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,float16,0,0.16827734311421713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,fp8,0,0.17459199825922647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,1,128,0,1,fp8,fp8,0,0.11195733149846394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,fp8,0,0.17322667439778647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,float16,0,0.1723733345667521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,2,128,0,1,fp8,fp8,0,0.11485866705576579
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,float16,0,0.1693013310432434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,fp8,0,0.1728853384653727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,4,128,0,1,fp8,fp8,0,0.11349333326021831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,float16,0,0.17544533809026083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,fp8,0,0.17117865880330405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,16,8,128,0,1,fp8,fp8,0,0.11281067132949829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,float16,0,0.1153706709543864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,fp8,0,0.11793067057927449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,16,128,0,1,fp8,fp8,0,0.0747519979874293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,float16,0,0.11673600474993388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,fp8,0,0.11639466881752014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,1,128,0,1,fp8,fp8,0,0.08226133386294048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,float16,0,0.11110400160153706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,fp8,0,0.11281067132949829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,2,128,0,1,fp8,fp8,0,0.08260266482830048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,float16,0,0.11520000298817952
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,fp8,0,0.11264000336329143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,4,128,0,1,fp8,fp8,0,0.0817493349313736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,float16,0,0.1153706709543864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,fp8,0,0.11673600474993388
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,16,8,128,0,1,fp8,fp8,0,0.07492266595363617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,float16,0,5.70146115620931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,fp8,0,5.82536506652832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,1,128,0,1,fp8,fp8,0,4.0306345621744795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,float16,0,5.778090794881185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,2,128,0,1,fp8,fp8,0,4.060501416524251
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,fp8,0,5.682858784993489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,float16,0,6.310911814371745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,fp8,0,6.298624038696289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,4,128,0,1,fp8,fp8,0,4.59775988260905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,float16,0,7.350101470947266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,fp8,0,4.262229283650716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,float16,0,4.492458661397298
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,fp8,0,7.087957382202148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,16,8,128,0,1,fp8,fp8,0,5.587114969889323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,float16,0,2.938026746114095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,16,128,0,1,fp8,fp8,0,3.6087465286254883
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,fp8,0,2.86463991800944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,1,128,0,1,fp8,fp8,0,2.0130133628845215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,float16,0,2.910208066304525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,2,128,0,1,fp8,fp8,0,2.053290685017904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,fp8,0,2.8922878901163735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,float16,0,3.159722646077474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,4,128,0,1,fp8,fp8,0,2.2707200050354004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,fp8,0,3.1452159881591797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,float16,0,3.5792214075724282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,fp8,0,3.5338239669799805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,float16,0,2.281813303629557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,fp8,0,2.1461332639058432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,16,8,128,0,1,fp8,fp8,0,2.7100159327189126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,16,128,0,1,fp8,fp8,0,1.751039981842041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,float16,0,1.4453760782877605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,fp8,0,1.4173866907755535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,1,128,0,1,fp8,fp8,0,0.979967991511027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,2,128,0,1,fp8,fp8,0,0.9890133539835612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,float16,0,1.4885546366373699
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,fp8,0,1.4586879412333171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,float16,0,1.577642599741618
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,4,128,0,1,fp8,fp8,0,1.096021334330241
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,float16,0,1.8078719774882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,fp8,0,1.5460693041483562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,fp8,0,1.7298773129781086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,16,128,0,1,fp8,fp8,0,0.8528213500976562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,fp8,0,1.074346701304118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,float16,0,0.6423893372217814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,float16,0,1.1381759643554688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,16,8,128,0,1,fp8,fp8,0,1.2945066293080647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,fp8,0,0.6331733465194702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,1,128,0,1,fp8,fp8,0,0.44407467047373456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,float16,0,0.6804479757944742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,fp8,0,0.661845326423645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,2,128,0,1,fp8,fp8,0,0.4904959996541341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,float16,0,0.7604906558990479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,4,128,0,1,fp8,fp8,0,0.5485226710637411
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,fp8,0,0.7567359606424967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,float16,0,0.8849066893259684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,fp8,0,0.8296106656392416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,16,8,128,0,1,fp8,fp8,0,0.65228799978892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,float16,0,0.5198506514231364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,fp8,0,0.4911786715189616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,16,128,0,1,fp8,fp8,0,0.4130133390426636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,float16,0,0.2872320016225179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,fp8,0,0.29627732435862225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,float16,0,0.2863786617914836
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,1,128,0,1,fp8,fp8,0,0.17612799008687338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,fp8,0,0.2918399969736735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,2,128,0,1,fp8,fp8,0,0.17220266660054526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,float16,0,0.30344533920288086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,fp8,0,0.2974720001220703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,4,128,0,1,fp8,fp8,0,0.1914880077044169
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,fp8,0,0.31948800881703693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,float16,0,0.35891199111938477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,16,8,128,0,1,fp8,fp8,0,0.2916693290074666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,float16,0,0.17851734161376953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,fp8,0,0.16776533921559653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,16,128,0,1,fp8,fp8,0,0.10547199845314026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,float16,0,0.15769599874814352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,1,128,0,1,fp8,fp8,0,0.10018133123715718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,fp8,0,0.15103999773661295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,float16,0,0.1616213321685791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,fp8,0,0.15121066570281982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,2,128,0,1,fp8,fp8,0,0.10120532910029094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,float16,0,0.1599146624406179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,fp8,0,0.1629866659641266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,float16,0,0.15923200050989786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,4,128,0,1,fp8,fp8,0,0.10342400272687276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,fp8,0,0.1616213321685791
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,16,8,128,0,1,fp8,fp8,0,0.10376532872517903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,float16,0,0.10222933689753215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,16,128,0,1,fp8,fp8,0,0.06178133189678192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,fp8,0,0.10205866893132527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,fp8,0,0.09471999605496724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,float16,0,0.10052266716957092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,1,128,0,1,fp8,fp8,0,0.059903999169667564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,float16,0,0.09966933727264404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,fp8,0,0.10052266716957092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,2,128,0,1,fp8,fp8,0,0.06092800199985504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,float16,0,0.09949866930643718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,fp8,0,0.0993280013402303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,4,128,0,1,fp8,fp8,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,float16,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,8,128,0,1,fp8,fp8,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,fp8,0,0.10240000486373901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,float16,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,fp8,0,0.05580799778302511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,16,128,0,1,fp8,fp8,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,float16,0,0.054101333022117615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,fp8,0,0.05529599885145823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,1,128,0,1,fp8,fp8,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,float16,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,fp8,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,float16,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,2,128,0,1,fp8,fp8,0,0.03874133278926214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,fp8,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,4,128,0,1,fp8,fp8,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,float16,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,fp8,0,0.05495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,16,8,128,0,1,fp8,fp8,0,0.03874133278926214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,1,128,0,1,float16,float16,0,4.214954694112142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,1,128,0,1,fp8,fp8,0,3.0945278803507485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,1,128,0,1,float16,fp8,0,4.213247934977214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,2,128,0,1,float16,float16,0,4.249429384867351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,2,128,0,1,fp8,fp8,0,3.140949249267578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,2,128,0,1,float16,fp8,0,4.194133440653483
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,4,128,0,1,float16,fp8,0,4.641450564066569
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,4,128,0,1,float16,float16,0,4.789077440897624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,4,128,0,1,fp8,fp8,0,3.7239465713500977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,8,128,0,1,float16,float16,0,5.852842966715495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,8,128,0,1,float16,fp8,0,5.563050587972005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,16,8,128,0,1,fp8,fp8,0,4.8740692138671875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,16,128,0,1,float16,float16,0,4.199253400166829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,1,128,0,1,float16,float16,0,2.1620052655537925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,16,128,0,1,float16,fp8,0,3.9422292709350586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,16,128,0,1,fp8,fp8,0,3.350528081258138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,1,128,0,1,float16,fp8,0,2.148693402608236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,1,128,0,1,fp8,fp8,0,1.5807147026062012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,2,128,0,1,float16,float16,0,2.1877759297688804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,2,128,0,1,fp8,fp8,0,1.5511892636617024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,2,128,0,1,float16,fp8,0,2.1524480183919272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,4,128,0,1,float16,float16,0,2.404522736867269
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,4,128,0,1,float16,fp8,0,2.3202133178710938
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,4,128,0,1,fp8,fp8,0,1.804800033569336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,8,128,0,1,float16,float16,0,2.8876800537109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,8,128,0,1,fp8,fp8,0,2.342400074005127
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,16,8,128,0,1,float16,fp8,0,2.758314768473307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,16,128,0,1,float16,float16,0,2.122922738393148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,16,128,0,1,float16,fp8,0,1.9971413612365723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,1,128,0,1,float16,float16,0,1.0385066668192546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,16,128,0,1,fp8,fp8,0,1.6407893498738606
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,1,128,0,1,float16,fp8,0,1.0359466870625813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,1,128,0,1,fp8,fp8,0,0.7519573370615641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,2,128,0,1,float16,float16,0,1.0775893529256184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,2,128,0,1,float16,fp8,0,1.0712746779123943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,2,128,0,1,fp8,fp8,0,0.8009386857350668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,4,128,0,1,float16,fp8,0,1.165824015935262
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,4,128,0,1,fp8,fp8,0,0.9120426972707113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,8,128,0,1,float16,float16,0,1.4494719505310059
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,4,128,0,1,float16,float16,0,1.18886399269104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,8,128,0,1,float16,fp8,0,1.3786452611287434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,16,128,0,1,float16,float16,0,1.0492586294809978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,16,8,128,0,1,fp8,fp8,0,1.1284480094909668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,16,128,0,1,float16,fp8,0,0.9919146696726481
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,1,128,0,1,float16,float16,0,0.446122686068217
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,1,128,0,1,fp8,fp8,0,0.3351893424987793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,1,128,0,1,float16,fp8,0,0.4352000157038371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,16,128,0,1,fp8,fp8,0,0.7657813231150309
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,2,128,0,1,float16,float16,0,0.5070506731669108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,2,128,0,1,float16,fp8,0,0.47991466522216797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,2,128,0,1,fp8,fp8,0,0.3643733263015747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,4,128,0,1,float16,float16,0,0.5599573453267416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,4,128,0,1,float16,fp8,0,0.5495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,4,128,0,1,fp8,fp8,0,0.4150613149007161
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,8,128,0,1,float16,float16,0,0.7053653399149576
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,8,128,0,1,float16,fp8,0,0.6719146569569906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,16,8,128,0,1,fp8,fp8,0,0.5261653264363607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,16,128,0,1,float16,fp8,0,0.4140373468399048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,16,128,0,1,float16,float16,0,0.45124268531799316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,16,128,0,1,fp8,fp8,0,0.35891199111938477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,1,128,0,1,float16,fp8,0,0.1955839991569519
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,1,128,0,1,float16,float16,0,0.19660800695419312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,1,128,0,1,fp8,fp8,0,0.11639466881752014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,2,128,0,1,float16,float16,0,0.19592533508936563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,2,128,0,1,float16,fp8,0,0.1996799906094869
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,2,128,0,1,fp8,fp8,0,0.11793067057927449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,4,128,0,1,float16,float16,0,0.20053333044052124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,4,128,0,1,float16,fp8,0,0.19182932376861572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,4,128,0,1,fp8,fp8,0,0.13960533340771994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,8,128,0,1,float16,float16,0,0.2621440092722575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,8,128,0,1,float16,fp8,0,0.22937599817911783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,16,8,128,0,1,fp8,fp8,0,0.23057067394256592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,16,128,0,1,float16,float16,0,0.12441600362459819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,16,128,0,1,float16,fp8,0,0.106495996316274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,16,128,0,1,fp8,fp8,0,0.07458133498827617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,1,128,0,1,float16,float16,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,1,128,0,1,float16,fp8,0,0.09830400347709656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,1,128,0,1,fp8,fp8,0,0.06587733328342438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,2,128,0,1,float16,float16,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,2,128,0,1,float16,fp8,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,2,128,0,1,fp8,fp8,0,0.06604800124963124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,4,128,0,1,float16,float16,0,0.09864532947540283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,4,128,0,1,float16,fp8,0,0.10035199920336406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,4,128,0,1,fp8,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,8,128,0,1,float16,float16,0,0.10274133086204529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,8,128,0,1,fp8,fp8,0,0.06758399804433186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,16,8,128,0,1,float16,fp8,0,0.10222933689753215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,16,128,0,1,float16,float16,0,0.06382933259010315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,16,128,0,1,float16,fp8,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,16,128,0,1,fp8,fp8,0,0.043007999658584595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,1,128,0,1,float16,fp8,0,0.06007466713587443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,1,128,0,1,float16,float16,0,0.05870933334032694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,1,128,0,1,fp8,fp8,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,2,128,0,1,float16,float16,0,0.05922133227189382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,2,128,0,1,float16,fp8,0,0.058880001306533813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,2,128,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,4,128,0,1,float16,float16,0,0.05870933334032694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,4,128,0,1,float16,fp8,0,0.0602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,4,128,0,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,8,128,0,1,float16,float16,0,0.06092800199985504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,8,128,0,1,float16,fp8,0,0.059392000238100685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,16,8,128,0,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,16,128,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,16,128,0,1,float16,fp8,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,16,128,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,1,128,0,1,float16,fp8,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,1,128,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,2,128,0,1,float16,float16,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,1,128,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,2,128,0,1,float16,fp8,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,2,128,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,4,128,0,1,float16,float16,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,4,128,0,1,float16,fp8,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,4,128,0,1,fp8,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,8,128,0,1,float16,float16,0,0.03515733281771342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,8,128,0,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,16,8,128,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,16,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,16,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,16,128,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,1,128,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,1,128,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,2,128,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,1,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,2,128,0,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,2,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,4,128,0,1,float16,float16,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,4,128,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,4,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,8,128,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,8,128,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,16,8,128,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,1,128,0,1,float16,float16,0,1.868117332458496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,1,128,0,1,fp8,fp8,0,1.3412693341573079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,1,128,0,1,float16,fp8,0,1.862826665242513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,2,128,0,1,float16,float16,0,1.967957337697347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,2,128,0,1,float16,fp8,0,1.9445759455362956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,2,128,0,1,fp8,fp8,0,1.407317320505778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,4,128,0,1,float16,float16,0,2.2987093925476074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,4,128,0,1,float16,fp8,0,2.243925412495931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,8,128,0,1,float16,float16,0,2.9112319946289062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,4,128,0,1,fp8,fp8,0,1.6839680671691895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,8,128,0,1,fp8,fp8,0,2.2312960624694824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,16,8,128,0,1,float16,fp8,0,2.7888641357421875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,16,128,0,1,float16,float16,0,2.099541346232096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,16,128,0,1,float16,fp8,0,1.990656057993571
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,16,128,0,1,fp8,fp8,0,1.6307199796040852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,1,128,0,1,float16,float16,0,0.897706667582194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,1,128,0,1,float16,fp8,0,0.889514684677124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,2,128,0,1,float16,float16,0,1.0011306603749592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,2,128,0,1,float16,fp8,0,0.958463986714681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,1,128,0,1,fp8,fp8,0,0.6917119820912679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,2,128,0,1,fp8,fp8,0,0.6934186617533366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,4,128,0,1,float16,float16,0,1.149781306584676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,4,128,0,1,float16,fp8,0,1.1204266548156738
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,4,128,0,1,fp8,fp8,0,0.8106666405995687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,8,128,0,1,float16,float16,0,1.4527146021525066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,8,128,0,1,float16,fp8,0,1.396224021911621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,16,128,0,1,float16,fp8,0,0.98798934618632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,16,8,128,0,1,fp8,fp8,0,1.0926079750061035
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,16,128,0,1,fp8,fp8,0,0.7719253698984782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,1,128,0,1,float16,float16,0,0.362496018409729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,1,128,0,1,float16,fp8,0,0.36317865053812665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,16,128,0,1,float16,float16,0,1.04857603708903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,1,128,0,1,fp8,fp8,0,0.2810879945755005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,2,128,0,1,float16,float16,0,0.41369601090749103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,2,128,0,1,float16,fp8,0,0.4266666571299235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,4,128,0,1,float16,fp8,0,0.506880005200704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,2,128,0,1,fp8,fp8,0,0.33211733897527057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,4,128,0,1,float16,float16,0,0.5246293147404989
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,4,128,0,1,fp8,fp8,0,0.3816106716791789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,8,128,0,1,float16,float16,0,0.6959786415100098
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,8,128,0,1,float16,fp8,0,0.6633813381195068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,16,8,128,0,1,fp8,fp8,0,0.5010773340861002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,16,128,0,1,float16,float16,0,0.4601173400878906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,16,128,0,1,float16,fp8,0,0.4242773453394572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,1,128,0,1,float16,float16,0,0.12288000186284383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,1,128,0,1,float16,fp8,0,0.13431466619173685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,16,128,0,1,fp8,fp8,0,0.3256319959958394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,1,128,0,1,fp8,fp8,0,0.0885759989420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,2,128,0,1,float16,float16,0,0.13687466581662497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,2,128,0,1,float16,fp8,0,0.1378986636797587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,2,128,0,1,fp8,fp8,0,0.0885759989420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,4,128,0,1,float16,float16,0,0.1430186629295349
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,4,128,0,1,fp8,fp8,0,0.09318400422732036
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,4,128,0,1,float16,fp8,0,0.13619200388590494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,8,128,0,1,float16,float16,0,0.20616533358891806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,8,128,0,1,float16,fp8,0,0.1843199928601583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,16,8,128,0,1,fp8,fp8,0,0.20121600230534872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,16,128,0,1,float16,float16,0,0.09830400347709656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,16,128,0,1,fp8,fp8,0,0.053077335158983864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,16,128,0,1,float16,fp8,0,0.08379733562469482
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,1,128,0,1,float16,float16,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,1,128,0,1,float16,fp8,0,0.0682666649421056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,1,128,0,1,fp8,fp8,0,0.05034666756788889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,2,128,0,1,float16,float16,0,0.06946133573849995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,2,128,0,1,float16,fp8,0,0.06809600194295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,2,128,0,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,4,128,0,1,float16,float16,0,0.06809600194295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,8,128,0,1,float16,float16,0,0.07355733215808868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,4,128,0,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,4,128,0,1,float16,fp8,0,0.06809600194295247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,8,128,0,1,float16,fp8,0,0.07116800049940745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,16,128,0,1,float16,float16,0,0.04420266548792521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,16,128,0,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,16,8,128,0,1,fp8,fp8,0,0.050517335534095764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,16,128,0,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,1,128,0,1,float16,float16,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,1,128,0,1,float16,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,1,128,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,2,128,0,1,float16,float16,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,2,128,0,1,float16,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,2,128,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,4,128,0,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,4,128,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,8,128,0,1,float16,float16,0,0.04232533276081085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,8,128,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,4,128,0,1,float16,float16,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,16,8,128,0,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,16,128,0,1,float16,float16,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,16,128,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,16,128,0,1,float16,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,1,128,0,1,float16,float16,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,1,128,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,1,128,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,2,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,2,128,0,1,float16,float16,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,2,128,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,4,128,0,1,float16,float16,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,4,128,0,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,4,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,8,128,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,8,128,0,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,16,8,128,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,16,128,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,16,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,16,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,1,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,1,128,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,1,128,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,2,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,2,128,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,2,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,4,128,0,1,float16,float16,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,4,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,4,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,8,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,8,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,16,8,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,16,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,16,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,16,128,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,1,128,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,1,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,1,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,2,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,2,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,2,128,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,4,128,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,4,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,4,128,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,8,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,8,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,16,8,128,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,1,128,0,1,float16,float16,0,0.9006079832712809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,1,128,0,1,float16,fp8,0,0.8941226800282797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,2,128,0,1,float16,float16,0,0.981503963470459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,2,128,0,1,float16,fp8,0,0.9600000381469727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,2,128,0,1,fp8,fp8,0,0.6746453444163004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,1,128,0,1,fp8,fp8,0,0.6176426808039347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,4,128,0,1,float16,float16,0,1.1489280064900715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,4,128,0,1,float16,fp8,0,1.1270826657613118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,4,128,0,1,fp8,fp8,0,0.8279039859771729
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,8,128,0,1,float16,float16,0,1.4644907315572102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,8,128,0,1,float16,fp8,0,1.4093653361002605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,16,128,0,1,float16,float16,0,1.0673493544260662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,16,8,128,0,1,fp8,fp8,0,1.1062613328297932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,16,128,0,1,fp8,fp8,0,0.7819946606953939
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,1,128,0,1,float16,float16,0,0.36164267857869464
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,16,128,0,1,float16,fp8,0,1.0018133322397869
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,1,128,0,1,float16,fp8,0,0.35276798407236737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,1,128,0,1,fp8,fp8,0,0.25702399015426636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,2,128,0,1,float16,float16,0,0.41915734608968097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,2,128,0,1,float16,fp8,0,0.4036266803741455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,2,128,0,1,fp8,fp8,0,0.2908160090446472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,4,128,0,1,float16,fp8,0,0.5053439935048422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,4,128,0,1,float16,float16,0,0.5287253459294637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,4,128,0,1,fp8,fp8,0,0.3604480028152466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,8,128,0,1,float16,float16,0,0.7007573445638021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,8,128,0,1,float16,fp8,0,0.66594131787618
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,16,8,128,0,1,fp8,fp8,0,0.5039786497751871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,16,128,0,1,float16,fp8,0,0.41574398676554364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,16,128,0,1,float16,float16,0,0.45602134863535565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,16,128,0,1,fp8,fp8,0,0.3314346671104431
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,1,128,0,1,float16,float16,0,0.09642666578292847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,1,128,0,1,fp8,fp8,0,0.07253333429495494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,1,128,0,1,float16,fp8,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,2,128,0,1,float16,float16,0,0.10120532910029094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,2,128,0,1,float16,fp8,0,0.09659733374913533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,2,128,0,1,fp8,fp8,0,0.0718506673971812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,4,128,0,1,float16,float16,0,0.12236799796422322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,4,128,0,1,float16,fp8,0,0.11400533715883891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,4,128,0,1,fp8,fp8,0,0.07970133423805237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,8,128,0,1,float16,float16,0,0.20462934176127115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,8,128,0,1,float16,fp8,0,0.1646933356920878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,16,8,128,0,1,fp8,fp8,0,0.18090667327245077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,16,128,0,1,float16,float16,0,0.08345599969228108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,16,128,0,1,float16,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,1,128,0,1,float16,float16,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,16,128,0,1,fp8,fp8,0,0.04505600035190582
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,1,128,0,1,float16,fp8,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,1,128,0,1,fp8,fp8,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,2,128,0,1,float16,float16,0,0.05239466826121012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,2,128,0,1,float16,fp8,0,0.05222400029500326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,2,128,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,4,128,0,1,float16,float16,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,4,128,0,1,float16,fp8,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,4,128,0,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,8,128,0,1,float16,float16,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,8,128,0,1,float16,fp8,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,16,8,128,0,1,fp8,fp8,0,0.04266666869322459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,16,128,0,1,float16,float16,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,16,128,0,1,float16,fp8,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,16,128,0,1,fp8,fp8,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,1,128,0,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,1,128,0,1,float16,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,1,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,2,128,0,1,float16,float16,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,2,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,2,128,0,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,4,128,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,4,128,0,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,4,128,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,8,128,0,1,float16,float16,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,8,128,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,16,8,128,0,1,fp8,fp8,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,16,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,16,128,0,1,float16,float16,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,1,128,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,16,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,1,128,0,1,float16,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,1,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,2,128,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,2,128,0,1,float16,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,2,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,4,128,0,1,float16,fp8,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,4,128,0,1,float16,float16,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,4,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,8,128,0,1,float16,float16,0,0.020821332931518555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,8,128,0,1,float16,fp8,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,16,8,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,16,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,16,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,16,128,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,1,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,1,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,1,128,0,1,float16,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,2,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,2,128,0,1,float16,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,2,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,4,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,4,128,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,4,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,8,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,8,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,16,128,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,16,8,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,16,128,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,16,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,1,128,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,1,128,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,1,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,2,128,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,2,128,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,2,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,4,128,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,4,128,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,4,128,0,1,fp8,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,8,128,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,8,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,16,8,128,0,1,fp8,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,16,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,16,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,16,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,1,128,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,1,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,1,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,2,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,2,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,2,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,4,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,4,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,4,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,8,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,8,128,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,16,8,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,1,128,0,1,float16,float16,0,0.36642134189605713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,1,128,0,1,float16,fp8,0,0.358570655186971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,1,128,0,1,fp8,fp8,0,0.2481493353843689
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,2,128,0,1,float16,fp8,0,0.4077226718266805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,2,128,0,1,float16,float16,0,0.42239999771118164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,2,128,0,1,fp8,fp8,0,0.27477333943049115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,4,128,0,1,float16,float16,0,0.5277013381322225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,4,128,0,1,fp8,fp8,0,0.36061867078145343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,8,128,0,1,float16,float16,0,0.7055359681447347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,4,128,0,1,float16,fp8,0,0.5102933247884115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,8,128,0,1,float16,fp8,0,0.6661119858423868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,16,8,128,0,1,fp8,fp8,0,0.5012480020523071
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,16,128,0,1,float16,fp8,0,0.42854400475819904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,16,128,0,1,float16,float16,0,0.4681386550267537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,16,128,0,1,fp8,fp8,0,0.32733867565790814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,1,128,0,1,float16,float16,0,0.08584533135096233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,1,128,0,1,float16,fp8,0,0.08447999755541484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,1,128,0,1,fp8,fp8,0,0.06587733328342438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,2,128,0,1,float16,float16,0,0.09233066439628601
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,2,128,0,1,float16,fp8,0,0.08686932921409607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,2,128,0,1,fp8,fp8,0,0.0653653343518575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,4,128,0,1,float16,float16,0,0.11690666278203328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,4,128,0,1,float16,fp8,0,0.10291199882825215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,4,128,0,1,fp8,fp8,0,0.0766293356815974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,8,128,0,1,float16,float16,0,0.2027519941329956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,8,128,0,1,fp8,fp8,0,0.17476266622543335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,16,128,0,1,float16,float16,0,0.0730453332265218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,16,128,0,1,float16,fp8,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,16,8,128,0,1,float16,fp8,0,0.16059733430544534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,16,128,0,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,1,128,0,1,float16,float16,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,1,128,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,1,128,0,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,2,128,0,1,float16,fp8,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,2,128,0,1,fp8,fp8,0,0.0363520011305809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,2,128,0,1,float16,float16,0,0.045567999283472695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,4,128,0,1,float16,fp8,0,0.06075733403364817
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,4,128,0,1,float16,float16,0,0.04659200211366018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,4,128,0,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,8,128,0,1,float16,fp8,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,8,128,0,1,float16,float16,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,16,8,128,0,1,fp8,fp8,0,0.038912000755469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,16,128,0,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,16,128,0,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,16,128,0,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,1,128,0,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,1,128,0,1,fp8,fp8,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,1,128,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,2,128,0,1,fp8,fp8,0,0.02184533327817917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,2,128,0,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,2,128,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,4,128,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,4,128,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,4,128,0,1,fp8,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,8,128,0,1,float16,float16,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,8,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,16,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,16,128,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,16,8,128,0,1,float16,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,16,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,1,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,1,128,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,1,128,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,2,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,2,128,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,2,128,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,4,128,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,4,128,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,4,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,8,128,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,8,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,16,8,128,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,16,128,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,16,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,16,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,1,128,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,1,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,1,128,0,1,fp8,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,2,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,2,128,0,1,fp8,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,2,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,4,128,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,4,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,4,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,8,128,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,8,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,16,8,128,0,1,fp8,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,16,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,16,128,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,16,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,1,128,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,1,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,1,128,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,2,128,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,2,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,2,128,0,1,float16,float16,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,4,128,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,4,128,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,4,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,8,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,8,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,16,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,16,8,128,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,16,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,16,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,1,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,1,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,2,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,2,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,4,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,2,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,4,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,4,128,0,1,fp8,fp8,0,0.010384000216921171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,8,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,8,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,16,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,16,8,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,16,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,1,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,16,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,2,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,2,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,2,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,4,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,4,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,8,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,8,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,16,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,1,128,0,1,float16,float16,0,0.08738133311271667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,1,128,0,1,float16,fp8,0,0.08874666690826416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,1,128,0,1,fp8,fp8,0,0.07406933108965556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,2,128,0,1,float16,float16,0,0.0942080020904541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,2,128,0,1,float16,fp8,0,0.09062400460243225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,2,128,0,1,fp8,fp8,0,0.07526400188604991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,4,128,0,1,float16,float16,0,0.116565336783727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,4,128,0,1,float16,fp8,0,0.105813334385554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,4,128,0,1,fp8,fp8,0,0.08482133348782857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,8,128,0,1,float16,fp8,0,0.16025599837303162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,8,128,0,1,float16,float16,0,0.20241065820058188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,16,8,128,0,1,fp8,fp8,0,0.17971199750900269
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,16,128,0,1,float16,float16,0,0.07935999830563863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,16,128,0,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,16,128,0,1,fp8,fp8,0,0.04471466441949209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,1,128,0,1,float16,float16,0,0.04727466901143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,1,128,0,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,1,128,0,1,fp8,fp8,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,2,128,0,1,float16,float16,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,2,128,0,1,float16,fp8,0,0.04761599997679392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,2,128,0,1,fp8,fp8,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,4,128,0,1,float16,float16,0,0.048469334840774536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,4,128,0,1,fp8,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,8,128,0,1,float16,float16,0,0.05205333232879639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,4,128,0,1,float16,fp8,0,0.04863999783992767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,8,128,0,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,16,8,128,0,1,fp8,fp8,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,16,128,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,16,128,0,1,float16,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,16,128,0,1,fp8,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,1,128,0,1,float16,float16,0,0.028160000840822857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,1,128,0,1,float16,fp8,0,0.027989332874615986
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,1,128,0,1,fp8,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,2,128,0,1,float16,float16,0,0.028330666323502857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,2,128,0,1,float16,fp8,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,2,128,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,4,128,0,1,float16,float16,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,4,128,0,1,float16,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,4,128,0,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,8,128,0,1,float16,float16,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,8,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,16,8,128,0,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,16,128,0,1,float16,float16,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,16,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,1,128,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,16,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,1,128,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,1,128,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,2,128,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,2,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,2,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,4,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,4,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,4,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,8,128,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,8,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,16,8,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,16,128,0,1,fp8,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,16,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,16,128,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,1,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,1,128,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,1,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,2,128,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,2,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,2,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,4,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,4,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,4,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,8,128,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,8,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,16,8,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,16,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,16,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,16,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,1,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,2,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,1,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,2,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,2,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,4,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,4,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,4,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,8,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,8,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,16,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,16,8,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,16,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,16,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,1,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,1,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,2,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,4,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,4,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,4,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,8,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,16,128,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,8,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,16,8,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,16,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,16,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,2,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,4,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,8,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,8,128,0,1,float16,fp8,0,0.009882666791478792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,16,8,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,16,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,16,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,16,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,1,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,1,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,2,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,4,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,4,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,8,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,8,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,16,8,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,1,128,0,1,float16,float16,0,0.06638933221499126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,1,128,0,1,float16,fp8,0,0.06553600231806438
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,1,128,0,1,fp8,fp8,0,0.053077335158983864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,2,128,0,1,float16,float16,0,0.06638933221499126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,2,128,0,1,fp8,fp8,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,4,128,0,1,float16,float16,0,0.06656000018119812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,2,128,0,1,float16,fp8,0,0.06604800124963124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,4,128,0,1,float16,fp8,0,0.06570666531721751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,4,128,0,1,fp8,fp8,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,8,128,0,1,float16,float16,0,0.07014399766921997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,8,128,0,1,float16,fp8,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,16,8,128,0,1,fp8,fp8,0,0.05529599885145823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,16,128,0,1,float16,float16,0,0.038912000755469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,16,128,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,1,128,0,1,float16,float16,0,0.03669333209594091
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,16,128,0,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,1,128,0,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,1,128,0,1,fp8,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,2,128,0,1,float16,float16,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,2,128,0,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,2,128,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,4,128,0,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,4,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,8,128,0,1,float16,float16,0,0.03874133278926214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,4,128,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,8,128,0,1,float16,fp8,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,16,8,128,0,1,fp8,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,16,128,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,16,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,16,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,1,128,0,1,float16,float16,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,1,128,0,1,float16,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,1,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,2,128,0,1,float16,float16,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,2,128,0,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,4,128,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,2,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,4,128,0,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,4,128,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,8,128,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,8,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,16,8,128,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,16,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,16,128,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,16,128,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,1,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,1,128,0,1,float16,fp8,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,1,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,2,128,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,2,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,2,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,4,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,4,128,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,4,128,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,8,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,8,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,16,8,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,16,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,16,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,16,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,1,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,1,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,1,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,2,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,2,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,4,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,4,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,2,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,8,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,4,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,8,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,16,8,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,16,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,16,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,16,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,1,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,2,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,2,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,4,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,4,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,4,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,8,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,8,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,16,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,16,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,16,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,16,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,2,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,4,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,4,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,8,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,8,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,16,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,16,8,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,16,128,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,16,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,1,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,2,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,1,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,2,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,4,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,4,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,4,128,0,1,fp8,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,8,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,8,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,16,8,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,16,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,16,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,16,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,1,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,1,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,1,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,2,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,4,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,4,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,4,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,8,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,8,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,16,8,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,1,128,0,1,fp8,fp8,0,24.486913045247395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,2,128,0,1,fp8,fp8,0,24.59204355875651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,float16,0,42.30485280354818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,fp8,0,40.754686991373696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,fp8,0,41.024513244628906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,float16,0,42.4089609781901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,float16,0,42.328234354654946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,fp8,0,42.248191833496094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,float16,0,20.819114685058594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,12,4,128,0,1,fp8,fp8,0,25.30133310953776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,12,128,0,1,fp8,fp8,0,12.942677815755209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,1,128,0,1,fp8,fp8,0,12.292607625325521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,fp8,0,21.855061848958332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,float16,0,20.32861836751302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,fp8,0,20.950698852539062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,float16,0,20.292437235514324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,2,128,0,1,fp8,fp8,0,12.287317911783854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,fp8,0,20.141738891601562
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,float16,0,19.92413838704427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,4,128,0,1,fp8,fp8,0,12.936021169026693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,float16,0,10.66154670715332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,12,128,0,1,fp8,fp8,0,6.69320551554362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,fp8,0,10.862421671549479
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,fp8,0,20.977493286132812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,float16,0,9.97000503540039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,1,128,0,1,fp8,fp8,0,5.546154657999675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,fp8,0,10.185386657714844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,2,128,0,1,fp8,fp8,0,5.8775895436604815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,fp8,0,10.403839747111002
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,float16,0,10.442752202351889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,float16,0,10.80507787068685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,fp8,0,10.604031880696615
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,12,4,128,0,1,fp8,fp8,0,5.972309112548828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,float16,0,5.194581349690755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,fp8,0,5.159082730611165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,12,128,0,1,fp8,fp8,0,3.2716798782348633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,float16,0,4.93943468729655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,fp8,0,5.034325281778972
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,1,128,0,1,fp8,fp8,0,2.8704427083333335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,float16,0,5.129727999369304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,fp8,0,4.590421358744304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,2,128,0,1,fp8,fp8,0,2.878122647603353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,float16,0,4.9919999440511065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,4,128,0,1,fp8,fp8,0,2.8883625666300454
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,fp8,0,4.8851626714070635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,1,128,0,1,fp8,fp8,0,14.298795064290365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,float16,0,24.04846954345703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,2,128,0,1,fp8,fp8,0,14.401194254557291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,fp8,0,24.5579096476237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,float16,0,23.674367268880207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,fp8,0,23.904256184895832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,float16,0,23.968427022298176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,fp8,0,24.065022786458332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,12,4,128,0,1,fp8,fp8,0,14.93930689493815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,12,128,0,1,fp8,fp8,0,7.762773513793945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,float16,0,12.85751469930013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,fp8,0,12.423168182373047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,float16,0,11.555328369140625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,1,128,0,1,fp8,fp8,0,6.7693227132161455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,fp8,0,11.898197174072266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,float16,0,12.259668986002604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,2,128,0,1,fp8,fp8,0,7.038464228312175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,fp8,0,11.8831787109375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,float16,0,12.33083724975586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,fp8,0,12.002474466959635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,12,4,128,0,1,fp8,fp8,0,6.930432001749675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,float16,0,5.864277521769206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,fp8,0,6.08460807800293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,12,128,0,1,fp8,fp8,0,3.799893379211426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,float16,0,6.124885559082031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,fp8,0,5.402111689249675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,1,128,0,1,fp8,fp8,0,3.292330741882324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,2,128,0,1,fp8,fp8,0,3.2373758951822915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,float16,0,5.510997136433919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,fp8,0,5.868031819661458
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,float16,0,5.655210494995117
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,fp8,0,5.691562652587891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,12,4,128,0,1,fp8,fp8,0,3.4095786412556968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,12,128,0,1,fp8,fp8,0,1.8875734011332195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,float16,0,3.0511786142985025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,fp8,0,3.0928214391072593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,float16,0,2.7465387980143228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,fp8,0,2.6888532638549805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,1,128,0,1,fp8,fp8,0,1.636522610982259
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,float16,0,2.799957275390625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,2,128,0,1,fp8,fp8,0,1.6295253435770671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,fp8,0,2.7835734685262046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,float16,0,2.724522590637207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,4,128,0,1,fp8,fp8,0,1.6655359268188477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,fp8,0,2.7615572611490884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,1,128,0,1,fp8,fp8,0,10.118997573852539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,float16,0,16.549888610839844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,fp8,0,16.70912043253581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,2,128,0,1,fp8,fp8,0,10.050559997558594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,float16,0,16.726868947347004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,fp8,0,17.25542449951172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,float16,0,16.78557840983073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,fp8,0,17.14841588338216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,float16,0,9.02553621927897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,12,4,128,0,1,fp8,fp8,0,10.602495829264322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,fp8,0,8.648533503214518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,12,128,0,1,fp8,fp8,0,5.417472203572591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,float16,0,8.067583719889322
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,1,128,0,1,fp8,fp8,0,4.432213465372722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,fp8,0,8.306517283121744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,float16,0,8.419839859008789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,2,128,0,1,fp8,fp8,0,4.85973326365153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,fp8,0,8.059733072916666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,float16,0,8.630783716837565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,fp8,0,8.12663459777832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,float16,0,4.257280031840007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,12,4,128,0,1,fp8,fp8,0,4.673706690470378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,fp8,0,4.313599904378255
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,12,128,0,1,fp8,fp8,0,2.8422826131184897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,float16,0,3.908437410990397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,1,128,0,1,fp8,fp8,0,2.2485334078470864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,fp8,0,3.8029654820760093
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,float16,0,3.9109973907470703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,2,128,0,1,fp8,fp8,0,2.264575958251953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,fp8,0,3.7737814585367837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,fp8,0,3.9423999786376953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,float16,0,3.9983787536621094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,12,4,128,0,1,fp8,fp8,0,2.476031939188639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,float16,0,2.2203733126322427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,fp8,0,2.136064052581787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,12,128,0,1,fp8,fp8,0,1.377621332804362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,float16,0,1.9101012547810872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,fp8,0,1.9070293108622234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,1,128,0,1,fp8,fp8,0,1.1764053503672283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,float16,0,1.981269359588623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,fp8,0,1.9587413469950359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,2,128,0,1,fp8,fp8,0,1.1717973550160725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,float16,0,1.9288746515909831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,fp8,0,1.9268266359965007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,12,4,128,0,1,fp8,fp8,0,1.18067200978597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,1,128,0,1,fp8,fp8,0,13.729962666829428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,float16,0,21.946027119954426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,2,128,0,1,fp8,fp8,0,13.454847971598307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,fp8,0,22.46826680501302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,fp8,0,21.69531758626302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,float16,0,21.94415028889974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,float16,0,22.150484720865887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,fp8,0,22.476287841796875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,float16,0,11.87566884358724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,12,128,0,1,fp8,fp8,0,7.5943253835042315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,fp8,0,11.802965799967447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,12,4,128,0,1,fp8,fp8,0,14.332586924235025
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,float16,0,10.897066752115885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,fp8,0,11.059883117675781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,1,128,0,1,fp8,fp8,0,6.572714487711589
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,float16,0,11.054932912190756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,2,128,0,1,fp8,fp8,0,6.411263783772786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,fp8,0,10.942464192708334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,float16,0,11.070634206136068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,float16,0,5.907456080118815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,fp8,0,5.655040105183919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,4,128,0,1,fp8,fp8,0,6.580394744873047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,fp8,0,11.496106465657553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,12,128,0,1,fp8,fp8,0,3.670186678568522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,float16,0,5.042858759562175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,fp8,0,5.017770767211914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,1,128,0,1,fp8,fp8,0,3.007317225138346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,2,128,0,1,fp8,fp8,0,3.052032152811686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,float16,0,5.319338798522949
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,fp8,0,5.216085433959961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,float16,0,5.187583923339844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,fp8,0,5.390677134195964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,float16,0,2.847231864929199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,12,4,128,0,1,fp8,fp8,0,3.241642634073893
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,fp8,0,2.825215975443522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,12,128,0,1,fp8,fp8,0,1.8868907292683919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,float16,0,2.4934399922688804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,fp8,0,2.4096426963806152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,1,128,0,1,fp8,fp8,0,1.4820693333943684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,float16,0,2.4415574073791504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,fp8,0,2.3947946230570474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,2,128,0,1,fp8,fp8,0,1.5486292839050293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,float16,0,2.5231359799702964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,fp8,0,2.5226240158081055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,float16,0,1.3917867342631023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,12,4,128,0,1,fp8,fp8,0,1.6054612795511882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,fp8,0,1.3637973467508953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,12,128,0,1,fp8,fp8,0,0.9016319910685221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,float16,0,1.2849493026733398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,fp8,0,1.3252267042795818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,1,128,0,1,fp8,fp8,0,0.7918933232625326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,float16,0,1.296895980834961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,fp8,0,1.2960426807403564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,2,128,0,1,fp8,fp8,0,0.7932586669921875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,fp8,0,1.2811946868896484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,float16,0,1.3257386684417725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,12,4,128,0,1,fp8,fp8,0,0.7852373123168945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,1,128,0,1,fp8,fp8,0,7.682218551635742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,2,128,0,1,fp8,fp8,0,7.831722895304362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,float16,0,12.854955037434896
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,fp8,0,12.637184143066406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,float16,0,12.714154561360678
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,fp8,0,13.034666697184244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,float16,0,13.696170806884766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,fp8,0,13.113855997721354
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,float16,0,6.749184290568034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,12,128,0,1,fp8,fp8,0,4.649642626444499
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,12,4,128,0,1,fp8,fp8,0,8.20582389831543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,fp8,0,6.933333079020183
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,float16,0,6.084778467814128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,fp8,0,5.694122950236003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,1,128,0,1,fp8,fp8,0,3.6157439549764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,float16,0,6.209023793538411
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,2,128,0,1,fp8,fp8,0,3.653461456298828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,fp8,0,6.1610666910807295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,float16,0,3.5075413386027017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,4,128,0,1,fp8,fp8,0,3.845973332722982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,float16,0,6.11515744527181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,fp8,0,6.508202870686849
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,fp8,0,3.4401280085245767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,12,128,0,1,fp8,fp8,0,2.2724266052246094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,float16,0,2.8069547017415366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,fp8,0,2.80729611714681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,1,128,0,1,fp8,fp8,0,1.804800033569336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,2,128,0,1,fp8,fp8,0,1.8652159372965496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,float16,0,2.9090134302775064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,fp8,0,2.882730801900228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,float16,0,3.0916268030802407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,fp8,0,2.963285446166992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,12,4,128,0,1,fp8,fp8,0,1.9160745938618977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,float16,0,1.6890880266825359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,fp8,0,1.6969386736551921
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,12,128,0,1,fp8,fp8,0,1.1719679832458496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,float16,0,1.4221653938293457
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,fp8,0,1.4202879269917805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,1,128,0,1,fp8,fp8,0,0.8611839612325033
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,float16,0,1.4085119565327961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,2,128,0,1,fp8,fp8,0,0.8632319768269857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,fp8,0,1.412266731262207
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,4,128,0,1,fp8,fp8,0,0.9185280005137125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,float16,0,1.4615893363952637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,fp8,0,1.4675626754760742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,float16,0,0.7975253264109293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,fp8,0,0.8079360326131185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,12,128,0,1,fp8,fp8,0,0.5167786677678426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,float16,0,0.7550293604532877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,fp8,0,0.7639040152231852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,1,128,0,1,fp8,fp8,0,0.4819626808166504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,2,128,0,1,fp8,fp8,0,0.4864000082015991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,float16,0,0.7847253481547037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,fp8,0,0.7910400231679281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,float16,0,0.794111967086792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,fp8,0,0.7821653683980306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,12,4,128,0,1,fp8,fp8,0,0.4828159809112549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,1,128,0,1,fp8,fp8,0,7.694165547688802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,float16,0,12.013226826985678
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,2,128,0,1,fp8,fp8,0,7.987882614135742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,fp8,0,12.459519704182943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,float16,0,12.618240356445312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,fp8,0,12.762282053629557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,float16,0,12.419242858886719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,fp8,0,12.418900807698568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,float16,0,6.861482620239258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,fp8,0,6.900394439697266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,12,128,0,1,fp8,fp8,0,4.902400016784668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,12,4,128,0,1,fp8,fp8,0,8.344746907552084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,float16,0,5.622101465861003
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,fp8,0,5.887999852498372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,1,128,0,1,fp8,fp8,0,3.4570239384969077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,float16,0,5.515434900919597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,2,128,0,1,fp8,fp8,0,3.7287254333496094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,4,128,0,1,fp8,fp8,0,4.040021260579427
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,float16,0,3.5182933807373047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,fp8,0,5.73201052347819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,float16,0,5.9999574025472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,fp8,0,3.3768107096354165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,fp8,0,6.007637023925781
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,12,128,0,1,fp8,fp8,0,2.3811413447062173
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,2,128,0,1,fp8,fp8,0,1.7879039446512859
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,float16,0,2.776576042175293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,1,128,0,1,fp8,fp8,0,1.773738702138265
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,fp8,0,2.763434727986654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,fp8,0,2.7796481450398765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,float16,0,2.7748692830403647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,float16,0,2.9137919743855796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,fp8,0,2.8898986180623374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,12,4,128,0,1,fp8,fp8,0,1.8973013559977214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,float16,0,1.7269760767618816
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,12,128,0,1,fp8,fp8,0,1.2144640286763508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,float16,0,1.2980906963348389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,fp8,0,1.6979626019795735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,fp8,0,1.2898986339569092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,1,128,0,1,fp8,fp8,0,0.8232959906260172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,float16,0,1.310378630956014
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,fp8,0,1.3052586714426677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,2,128,0,1,fp8,fp8,0,0.8447999954223633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,float16,0,1.4291626612345378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,fp8,0,1.3914453188578289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,float16,0,0.7947946389516195
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,12,4,128,0,1,fp8,fp8,0,0.9437867005666097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,fp8,0,0.7512746651967367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,12,128,0,1,fp8,fp8,0,0.5476693312327067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,float16,0,0.6835199991861979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,fp8,0,0.6988800366719564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,1,128,0,1,fp8,fp8,0,0.4278613328933716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,fp8,0,0.6891520023345947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,float16,0,0.696832021077474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,2,128,0,1,fp8,fp8,0,0.43485867977142334
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,fp8,0,0.6898346741994222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,float16,0,0.6927359898885092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,12,4,128,0,1,fp8,fp8,0,0.4275199969609578
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,float16,0,0.4060159921646118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,fp8,0,0.40584532419840497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,12,128,0,1,fp8,fp8,0,0.26077866554260254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,float16,0,0.3834880193074544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,1,128,0,1,fp8,fp8,0,0.25088000297546387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,fp8,0,0.3916800022125244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,float16,0,0.3819520076115926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,fp8,0,0.38417065143585205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,2,128,0,1,fp8,fp8,0,0.2563413381576538
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,float16,0,0.389631986618042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,fp8,0,0.3848533233006795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,12,4,128,0,1,fp8,fp8,0,0.2599253257115682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,float16,0,6.959957122802734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,fp8,0,7.003136316935222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,1,128,0,1,fp8,fp8,0,4.575402577718099
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,float16,0,7.045973459879558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,2,128,0,1,fp8,fp8,0,4.899840037027995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,fp8,0,7.087786356608073
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,float16,0,7.3482240041097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,fp8,0,7.269376118977864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,12,4,128,0,1,fp8,fp8,0,5.164373397827148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,float16,0,4.383573214213054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,fp8,0,4.248234748840332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,12,128,0,1,fp8,fp8,0,3.1544319788614907
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,float16,0,3.3356800079345703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,1,128,0,1,fp8,fp8,0,2.1370879809061685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,fp8,0,3.311786651611328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,float16,0,3.3887573877970376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,2,128,0,1,fp8,fp8,0,2.2676480611165366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,fp8,0,3.314517339070638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,4,128,0,1,fp8,fp8,0,2.4605013529459634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,fp8,0,3.5290454228719077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,float16,0,2.1304319699605307
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,fp8,0,2.1239466667175293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,float16,0,3.580927848815918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,12,128,0,1,fp8,fp8,0,1.5359999338785808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,float16,0,1.6254293123881023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,fp8,0,1.5617705980936687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,1,128,0,1,fp8,fp8,0,1.0414079825083415
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,float16,0,1.632256031036377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,fp8,0,1.6201386451721191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,2,128,0,1,fp8,fp8,0,1.083562692006429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,fp8,0,1.7346560160319011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,float16,0,1.754111925760905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,float16,0,1.0333866278330486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,12,4,128,0,1,fp8,fp8,0,1.2028586864471436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,12,128,0,1,fp8,fp8,0,0.756223996480306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,fp8,0,1.0226346651713054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,float16,0,0.7661226590474447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,fp8,0,0.78438401222229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,1,128,0,1,fp8,fp8,0,0.4751360019048055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,float16,0,0.7709013621012369
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,fp8,0,0.7649280230204264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,2,128,0,1,fp8,fp8,0,0.49237334728240967
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,float16,0,0.7920640309651693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,fp8,0,0.8062293529510498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,12,4,128,0,1,fp8,fp8,0,0.5336746772130331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,float16,0,0.4478293259938558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,fp8,0,0.442197322845459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,12,128,0,1,fp8,fp8,0,0.2964479923248291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,float16,0,0.40516265233357746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,fp8,0,0.4212053219477336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,1,128,0,1,fp8,fp8,0,0.26436267296473187
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,fp8,0,0.41250133514404297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,float16,0,0.4242773453394572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,2,128,0,1,fp8,fp8,0,0.26282666126887005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,float16,0,0.42905600865681964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,4,128,0,1,fp8,fp8,0,0.26231465737024945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,fp8,0,0.4242773453394572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,float16,0,0.2681173284848531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,fp8,0,0.25975465774536133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,12,128,0,1,fp8,fp8,0,0.17373865842819214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,float16,0,0.25497599442799884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,fp8,0,0.250709335009257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,1,128,0,1,fp8,fp8,0,0.17203199863433838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,float16,0,0.24883200724919638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,2,128,0,1,fp8,fp8,0,0.17612799008687338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,fp8,0,0.25309866666793823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,float16,0,0.2513920068740845
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,fp8,0,0.2500266631444295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,12,4,128,0,1,fp8,fp8,0,0.17203199863433838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,float16,0,7.213055928548177
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,fp8,0,7.131989161173503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,1,128,0,1,fp8,fp8,0,4.877994537353516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,2,128,0,1,fp8,fp8,0,5.18229325612386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,float16,0,7.5828908284505205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,fp8,0,7.376895904541016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,float16,0,7.666858673095703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,fp8,0,7.796394983927409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,12,4,128,0,1,fp8,fp8,0,5.754367828369141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,float16,0,4.59775988260905
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,fp8,0,4.578133265177409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,12,128,0,1,fp8,fp8,0,3.560789426167806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,float16,0,3.3976319630940757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,1,128,0,1,fp8,fp8,0,2.270890712738037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,fp8,0,3.3114452362060547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,float16,0,3.498154640197754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,2,128,0,1,fp8,fp8,0,2.4232959747314453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,float16,0,3.657557487487793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,fp8,0,3.459413210550944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,fp8,0,3.6377598444620767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,12,4,128,0,1,fp8,fp8,0,2.7216211954752603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,float16,0,2.284202734629313
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,12,128,0,1,fp8,fp8,0,1.7338026364644368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,fp8,0,2.242389361063639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,float16,0,1.6093866030375164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,1,128,0,1,fp8,fp8,0,1.0781013170878093
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,fp8,0,1.5875412623087566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,float16,0,1.6663893063863118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,fp8,0,1.6887466112772624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,2,128,0,1,fp8,fp8,0,1.1327146689097087
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,float16,0,1.82476806640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,fp8,0,1.771349271138509
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,12,4,128,0,1,fp8,fp8,0,1.2625919977823894
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,fp8,0,1.0746880372365315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,float16,0,1.1274240016937256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,12,128,0,1,fp8,fp8,0,0.839680035909017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,fp8,0,0.7285760243733724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,float16,0,0.7488853136698405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,1,128,0,1,fp8,fp8,0,0.48554666837056476
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,float16,0,0.7691946824391683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,fp8,0,0.7540053526560465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,2,128,0,1,fp8,fp8,0,0.505514661471049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,float16,0,0.8579413096110026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,fp8,0,0.8238080342610677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,12,4,128,0,1,fp8,fp8,0,0.6016000111897787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,float16,0,0.5113173325856527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,fp8,0,0.45073068141937256
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,12,128,0,1,fp8,fp8,0,0.37853864828745526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,fp8,0,0.37717334429423016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,float16,0,0.3860479990641276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,1,128,0,1,fp8,fp8,0,0.23040000597635904
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,float16,0,0.3863893349965413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,fp8,0,0.38280534744262695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,2,128,0,1,fp8,fp8,0,0.24081067244211832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,float16,0,0.3957759936650594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,4,128,0,1,fp8,fp8,0,0.23773866891860962
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,float16,0,0.23057067394256592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,fp8,0,0.38929065068562824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,fp8,0,0.22613332668940225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,12,128,0,1,fp8,fp8,0,0.14574933052062988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,float16,0,0.20906666914621988
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,fp8,0,0.21538132429122925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,1,128,0,1,fp8,fp8,0,0.14353066682815552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,float16,0,0.2106026609738668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,fp8,0,0.2135039965311686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,float16,0,0.21623466412226358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,fp8,0,0.2141866683959961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,2,128,0,1,fp8,fp8,0,0.13823999961217245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,12,4,128,0,1,fp8,fp8,0,0.14216533303260803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,float16,0,0.13704533378283182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,fp8,0,0.13960533340771994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,12,128,0,1,fp8,fp8,0,0.09591466188430786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,float16,0,0.14148267110188803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,1,128,0,1,fp8,fp8,0,0.09762133161226909
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,fp8,0,0.13755733768145242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,float16,0,0.1353386640548706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,fp8,0,0.13619200388590494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,2,128,0,1,fp8,fp8,0,0.09864532947540283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,float16,0,0.1397760013739268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,fp8,0,0.1367039978504181
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,12,4,128,0,1,fp8,fp8,0,0.09898666540781657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,float16,0,4.353365262349446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,fp8,0,4.435968081156413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,1,128,0,1,fp8,fp8,0,3.058346748352051
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,float16,0,4.511573473612468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,fp8,0,4.580693244934082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,2,128,0,1,fp8,fp8,0,3.3030827840169272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,float16,0,4.7878828048706055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,fp8,0,4.807509422302246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,12,4,128,0,1,fp8,fp8,0,3.624277432759603
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,float16,0,3.0426454544067383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,12,128,0,1,fp8,fp8,0,2.364586671193441
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,fp8,0,2.9550933837890625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,float16,0,2.0677973429361978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,fp8,0,2.059434731801351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,1,128,0,1,fp8,fp8,0,1.3858133951822917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,float16,0,2.133845329284668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,fp8,0,2.0935680071512857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,2,128,0,1,fp8,fp8,0,1.5100587209065754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,float16,0,2.3082666397094727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,float16,0,1.5022080739339192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,4,128,0,1,fp8,fp8,0,1.7080319722493489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,fp8,0,2.2821547190348306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,fp8,0,1.4489599863688152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,12,128,0,1,fp8,fp8,0,1.1386880079905193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,float16,0,0.9623893102010092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,fp8,0,0.9698987007141113
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,1,128,0,1,fp8,fp8,0,0.6620159943898519
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,float16,0,1.028608004252116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,fp8,0,1.02348796526591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,2,128,0,1,fp8,fp8,0,0.7156053384145101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,float16,0,1.1141119798024495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,fp8,0,1.1067732969919841
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,12,4,128,0,1,fp8,fp8,0,0.8099839687347412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,float16,0,0.7108266353607178
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,fp8,0,0.6664533217748007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,12,128,0,1,fp8,fp8,0,0.5633706649144491
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,float16,0,0.4500480095545451
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,1,128,0,1,fp8,fp8,0,0.2786986629168193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,fp8,0,0.43383467197418213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,float16,0,0.4471466541290283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,fp8,0,0.442197322845459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,2,128,0,1,fp8,fp8,0,0.28091732660929364
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,float16,0,0.47121067841847736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,fp8,0,0.4705280065536499
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,float16,0,0.2650453249613444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,12,4,128,0,1,fp8,fp8,0,0.35601067543029785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,fp8,0,0.24678399165471396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,12,128,0,1,fp8,fp8,0,0.187391996383667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,float16,0,0.2315946618715922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,fp8,0,0.22425599892934164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,1,128,0,1,fp8,fp8,0,0.1508693297704061
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,float16,0,0.2387626568476359
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,fp8,0,0.22681599855422974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,2,128,0,1,fp8,fp8,0,0.15308800339698792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,float16,0,0.23995733261108398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,4,128,0,1,fp8,fp8,0,0.1565013329188029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,fp8,0,0.24012800057729086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,float16,0,0.145578662554423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,fp8,0,0.14711466431617737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,12,128,0,1,fp8,fp8,0,0.10103467106819153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,float16,0,0.1397760013739268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,fp8,0,0.14148267110188803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,float16,0,0.14199466506640115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,1,128,0,1,fp8,fp8,0,0.09983999530474345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,fp8,0,0.1443839967250824
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,2,128,0,1,fp8,fp8,0,0.09847467144330342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,float16,0,0.14353066682815552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,4,128,0,1,fp8,fp8,0,0.10001066327095032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,fp8,0,0.14523733655611673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,float16,0,0.08703999718030293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,fp8,0,0.08567466338475545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,12,128,0,1,fp8,fp8,0,0.05836800237496694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,float16,0,0.09062400460243225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,fp8,0,0.08550399541854858
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,1,128,0,1,fp8,fp8,0,0.05734399954477946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,float16,0,0.08482133348782857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,fp8,0,0.08703999718030293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,2,128,0,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,float16,0,0.08772266904513042
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,4,128,0,1,fp8,fp8,0,0.057002668579419456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,fp8,0,0.08601599931716919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,float16,0,4.525397300720215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,fp8,0,4.471978823343913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,1,128,0,1,fp8,fp8,0,3.1820799509684243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,float16,0,4.780032157897949
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,fp8,0,4.741632143656413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,2,128,0,1,fp8,fp8,0,3.4602667490641275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,float16,0,5.1843414306640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,fp8,0,5.020671844482422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,12,4,128,0,1,fp8,fp8,0,3.9485438664754233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,float16,0,3.3653761545817056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,fp8,0,3.1825920740763345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,12,128,0,1,fp8,fp8,0,2.8149760564168296
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,float16,0,2.2452905972798667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,fp8,0,2.1734399795532227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,1,128,0,1,fp8,fp8,0,1.5994879404703777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,float16,0,2.3092907269795737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,fp8,0,2.293077309926351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,2,128,0,1,fp8,fp8,0,1.703935941060384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,float16,0,2.535253365834554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,4,128,0,1,fp8,fp8,0,1.9544746081034343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,fp8,0,2.516138712565104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,float16,0,1.6701439221700032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,fp8,0,1.5935146013895671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,12,128,0,1,fp8,fp8,0,1.3289813200632732
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,float16,0,1.04857603708903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,fp8,0,1.0287786324818928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,1,128,0,1,fp8,fp8,0,0.754688024520874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,2,128,0,1,fp8,fp8,0,0.8053759733835856
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,float16,0,1.126570701599121
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,fp8,0,1.097215970357259
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,float16,0,1.233237346013387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,fp8,0,1.1915946801503499
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,12,4,128,0,1,fp8,fp8,0,0.9105066458384196
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,float16,0,0.8171520233154297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,fp8,0,0.7666347026824951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,12,128,0,1,fp8,fp8,0,0.6626986662546793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,float16,0,0.4514133135477702
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,fp8,0,0.4485119978586833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,1,128,0,1,fp8,fp8,0,0.3251199920972188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,float16,0,0.48878931999206543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,fp8,0,0.4689919948577881
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,2,128,0,1,fp8,fp8,0,0.35072000821431476
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,float16,0,0.5591040054957072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,fp8,0,0.5358933210372925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,12,4,128,0,1,fp8,fp8,0,0.43008001645406085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,float16,0,0.34833065668741864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,fp8,0,0.2959360082944234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,float16,0,0.21947733561197916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,fp8,0,0.21964800357818604
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,1,128,0,1,fp8,fp8,0,0.13414399822553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,12,128,0,1,fp8,fp8,0,0.28279467423756915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,float16,0,0.2208426594734192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,fp8,0,0.22476800282796225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,2,128,0,1,fp8,fp8,0,0.13414399822553
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,float16,0,0.22613332668940225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,float16,0,0.1302186648050944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,fp8,0,0.2249386707941691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,12,4,128,0,1,fp8,fp8,0,0.13755733768145242
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,12,128,0,1,fp8,fp8,0,0.08635733524958293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,fp8,0,0.12902399897575378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,float16,0,0.1220266620318095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,fp8,0,0.1220266620318095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,1,128,0,1,fp8,fp8,0,0.08925867080688477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,float16,0,0.11912533640861511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,fp8,0,0.12032000223795573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,2,128,0,1,fp8,fp8,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,float16,0,0.12458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,fp8,0,0.12117333213488261
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,12,4,128,0,1,fp8,fp8,0,0.08686932921409607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,float16,0,0.0885759989420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,fp8,0,0.08567466338475545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,12,128,0,1,fp8,fp8,0,0.049322664737701416
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,float16,0,0.07628799974918365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,1,128,0,1,fp8,fp8,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,fp8,0,0.0820906658967336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,float16,0,0.0766293356815974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,fp8,0,0.08601599931716919
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,2,128,0,1,fp8,fp8,0,0.04915200173854828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,float16,0,0.08584533135096233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,fp8,0,0.07645866771539052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,float16,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,12,4,128,0,1,fp8,fp8,0,0.05000533163547516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,fp8,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,float16,0,0.05273599922657013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,12,128,0,1,fp8,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,fp8,0,0.05256533126036326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,1,128,0,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,float16,0,0.051882664362589516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,fp8,0,0.05239466826121012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,2,128,0,1,fp8,fp8,0,0.03737599899371465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,float16,0,0.05239466826121012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,fp8,0,0.05273599922657013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,12,4,128,0,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,1,128,0,1,float16,float16,0,3.411797205607096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,1,128,0,1,float16,fp8,0,3.3363625208536782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,1,128,0,1,fp8,fp8,0,2.536959966023763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,2,128,0,1,float16,float16,0,3.5899734497070312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,2,128,0,1,fp8,fp8,0,2.825215975443522
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,2,128,0,1,float16,fp8,0,3.5604480107625327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,4,128,0,1,float16,float16,0,4.097877184549968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,4,128,0,1,float16,fp8,0,3.9190187454223633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,12,4,128,0,1,fp8,fp8,0,3.3035945892333984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,12,128,0,1,float16,float16,0,3.115690549214681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,12,128,0,1,float16,fp8,0,2.9585065841674805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,1,128,0,1,float16,float16,0,1.6143360137939453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,12,128,0,1,fp8,fp8,0,2.6280959447224936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,1,128,0,1,float16,fp8,0,1.595392068227132
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,1,128,0,1,fp8,fp8,0,1.2864853541056316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,2,128,0,1,float16,float16,0,1.7512106895446777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,2,128,0,1,float16,fp8,0,1.7447253863016765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,2,128,0,1,fp8,fp8,0,1.369258721669515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,4,128,0,1,float16,float16,0,1.9855359395345051
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,4,128,0,1,float16,fp8,0,1.9290453592936199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,12,128,0,1,float16,float16,0,1.5807147026062012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,12,4,128,0,1,fp8,fp8,0,1.6354986826578777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,12,128,0,1,float16,fp8,0,1.5015254020690918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,12,128,0,1,fp8,fp8,0,1.2714666525522869
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,1,128,0,1,float16,float16,0,0.7850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,1,128,0,1,float16,fp8,0,0.7671466668446859
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,1,128,0,1,fp8,fp8,0,0.5956266721089681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,2,128,0,1,float16,float16,0,0.8362666765848795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,2,128,0,1,float16,fp8,0,0.8267093499501547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,2,128,0,1,fp8,fp8,0,0.6637226740519205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,4,128,0,1,float16,float16,0,0.9478826522827148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,4,128,0,1,float16,fp8,0,0.9082880020141602
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,12,4,128,0,1,fp8,fp8,0,0.7581013043721517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,12,128,0,1,float16,float16,0,0.7434240182240804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,12,128,0,1,float16,fp8,0,0.6915413538614908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,12,128,0,1,fp8,fp8,0,0.5736106634140015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,1,128,0,1,float16,fp8,0,0.30003199974695843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,1,128,0,1,float16,float16,0,0.30805333455403644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,1,128,0,1,fp8,fp8,0,0.24302933613459268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,2,128,0,1,float16,float16,0,0.3479893207550049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,2,128,0,1,float16,fp8,0,0.33740798632303876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,2,128,0,1,fp8,fp8,0,0.26828799645106
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,4,128,0,1,float16,float16,0,0.4189866781234741
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,4,128,0,1,float16,fp8,0,0.39628799756368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,12,4,128,0,1,fp8,fp8,0,0.33382399876912433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,12,128,0,1,float16,float16,0,0.26470400889714557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,12,128,0,1,float16,fp8,0,0.2167466680208842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,1,128,0,1,float16,float16,0,0.14131200313568115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,1,128,0,1,float16,fp8,0,0.14131200313568115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,12,128,0,1,fp8,fp8,0,0.2392746607462565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,1,128,0,1,fp8,fp8,0,0.09096533060073853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,2,128,0,1,float16,fp8,0,0.13943466544151306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,2,128,0,1,float16,float16,0,0.14131200313568115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,2,128,0,1,fp8,fp8,0,0.091648002465566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,4,128,0,1,float16,float16,0,0.1474560002485911
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,4,128,0,1,float16,fp8,0,0.1460906664530436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,12,4,128,0,1,fp8,fp8,0,0.09215999643007915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,12,128,0,1,float16,float16,0,0.091648002465566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,12,128,0,1,float16,fp8,0,0.08635733524958293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,12,128,0,1,fp8,fp8,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,1,128,0,1,float16,float16,0,0.07799466451009114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,1,128,0,1,float16,fp8,0,0.07867733140786488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,1,128,0,1,fp8,fp8,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,2,128,0,1,float16,float16,0,0.08106666803359985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,2,128,0,1,float16,fp8,0,0.07765333354473114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,4,128,0,1,float16,float16,0,0.07970133423805237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,2,128,0,1,fp8,fp8,0,0.05222400029500326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,4,128,0,1,float16,fp8,0,0.08004266520341237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,12,4,128,0,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,12,128,0,1,float16,float16,0,0.05017599960168203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,12,128,0,1,float16,fp8,0,0.048810665806134544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,12,128,0,1,fp8,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,1,128,0,1,float16,float16,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,1,128,0,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,1,128,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,2,128,0,1,float16,float16,0,0.04539733131726583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,2,128,0,1,float16,fp8,0,0.04744533201058706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,2,128,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,4,128,0,1,float16,float16,0,0.046762665112813316
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,4,128,0,1,float16,fp8,0,0.04710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,12,4,128,0,1,fp8,fp8,0,0.0339626669883728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,12,128,0,1,float16,float16,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,12,128,0,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,12,128,0,1,float16,fp8,0,0.03379199902216593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,1,128,0,1,float16,float16,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,1,128,0,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,1,128,0,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,2,128,0,1,float16,float16,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,2,128,0,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,2,128,0,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,4,128,0,1,float16,float16,0,0.03310933212439219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,4,128,0,1,fp8,fp8,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,12,4,128,0,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,12,128,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,12,128,0,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,1,128,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,12,128,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,1,128,0,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,1,128,0,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,2,128,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,2,128,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,2,128,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,4,128,0,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,4,128,0,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,12,4,128,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,1,128,0,1,float16,float16,0,1.4506667455037434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,1,128,0,1,float16,fp8,0,1.4271146456400554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,1,128,0,1,fp8,fp8,0,1.1091626485188801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,2,128,0,1,float16,float16,0,1.6076800028483074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,2,128,0,1,float16,fp8,0,1.5831039746602376
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,2,128,0,1,fp8,fp8,0,1.234773317972819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,4,128,0,1,float16,float16,0,1.9618132909138997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,4,128,0,1,float16,fp8,0,1.8894507090250652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,12,128,0,1,float16,float16,0,1.5709865887959797
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,12,4,128,0,1,fp8,fp8,0,1.519445260365804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,12,128,0,1,float16,fp8,0,1.492479960123698
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,1,128,0,1,float16,float16,0,0.6541653474171957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,12,128,0,1,fp8,fp8,0,1.2619093259175618
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,1,128,0,1,float16,fp8,0,0.6732800006866455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,1,128,0,1,fp8,fp8,0,0.5254826545715332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,2,128,0,1,float16,float16,0,0.7418879667917887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,2,128,0,1,float16,fp8,0,0.7210666338602701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,2,128,0,1,fp8,fp8,0,0.5860693454742432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,4,128,0,1,float16,float16,0,0.9212586879730225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,4,128,0,1,float16,fp8,0,0.8891733487447103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,12,4,128,0,1,fp8,fp8,0,0.7051946322123209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,12,128,0,1,float16,float16,0,0.7497386932373047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,12,128,0,1,float16,fp8,0,0.6934186617533366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,12,128,0,1,fp8,fp8,0,0.5857280095418295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,1,128,0,1,float16,float16,0,0.24064000447591147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,1,128,0,1,float16,fp8,0,0.23449599742889404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,1,128,0,1,fp8,fp8,0,0.19473065932591757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,2,128,0,1,float16,float16,0,0.26897066831588745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,2,128,0,1,float16,fp8,0,0.2539520064989726
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,2,128,0,1,fp8,fp8,0,0.22425599892934164
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,4,128,0,1,float16,float16,0,0.3797333240509033
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,4,128,0,1,float16,fp8,0,0.3490133285522461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,12,4,128,0,1,fp8,fp8,0,0.29764266808827716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,12,128,0,1,float16,float16,0,0.24593067169189453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,12,128,0,1,float16,fp8,0,0.1812480092048645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,12,128,0,1,fp8,fp8,0,0.21094399690628052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,1,128,0,1,float16,float16,0,0.09523199995358785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,1,128,0,1,float16,fp8,0,0.09727999567985535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,1,128,0,1,fp8,fp8,0,0.06724266707897186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,2,128,0,1,float16,float16,0,0.09608532985051473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,2,128,0,1,float16,fp8,0,0.09454933802286784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,2,128,0,1,fp8,fp8,0,0.06758399804433186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,4,128,0,1,float16,float16,0,0.10052266716957092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,4,128,0,1,float16,fp8,0,0.09779199957847595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,12,4,128,0,1,fp8,fp8,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,12,128,0,1,float16,float16,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,12,128,0,1,float16,fp8,0,0.059903999169667564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,12,128,0,1,fp8,fp8,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,1,128,0,1,float16,float16,0,0.05273599922657013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,1,128,0,1,float16,fp8,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,1,128,0,1,fp8,fp8,0,0.03942399968703588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,2,128,0,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,2,128,0,1,float16,fp8,0,0.053247998158137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,2,128,0,1,fp8,fp8,0,0.039936001102129616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,4,128,0,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,4,128,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,12,4,128,0,1,float16,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,12,128,0,1,float16,float16,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,12,128,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,12,128,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,1,128,0,1,float16,fp8,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,1,128,0,1,float16,float16,0,0.03328000009059906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,1,128,0,1,fp8,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,2,128,0,1,float16,float16,0,0.03328000009059906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,2,128,0,1,float16,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,2,128,0,1,fp8,fp8,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,4,128,0,1,float16,float16,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,4,128,0,1,float16,fp8,0,0.034304000437259674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,12,4,128,0,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,12,128,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,12,128,0,1,float16,float16,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,12,128,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,1,128,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,1,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,1,128,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,2,128,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,2,128,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,2,128,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,4,128,0,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,4,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,12,4,128,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,12,128,0,1,float16,float16,0,0.01826133330663045
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,12,128,0,1,float16,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,12,128,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,1,128,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,1,128,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,1,128,0,1,fp8,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,2,128,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,2,128,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,2,128,0,1,fp8,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,4,128,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,4,128,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,12,4,128,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,12,128,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,12,128,0,1,float16,float16,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,1,128,0,1,float16,float16,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,12,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,1,128,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,1,128,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,2,128,0,1,float16,float16,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,2,128,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,2,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,4,128,0,1,float16,float16,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,4,128,0,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,12,4,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,1,128,0,1,float16,float16,0,0.66594131787618
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,1,128,0,1,float16,fp8,0,0.6458026568094889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,1,128,0,1,fp8,fp8,0,0.47257598241170246
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,2,128,0,1,float16,float16,0,0.754858652750651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,2,128,0,1,float16,fp8,0,0.7296000321706136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,2,128,0,1,fp8,fp8,0,0.5452800194422404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,4,128,0,1,float16,float16,0,0.9351999759674072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,4,128,0,1,float16,fp8,0,0.8936106363932291
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,12,128,0,1,float16,float16,0,0.7656106948852539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,12,4,128,0,1,fp8,fp8,0,0.7031466960906982
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,12,128,0,1,float16,fp8,0,0.7058773040771484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,12,128,0,1,fp8,fp8,0,0.5918720165888468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,1,128,0,1,float16,fp8,0,0.19387733936309814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,1,128,0,1,float16,float16,0,0.20548266172409058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,1,128,0,1,fp8,fp8,0,0.16145066420237222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,2,128,0,1,float16,float16,0,0.2616320053736369
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,2,128,0,1,float16,fp8,0,0.24576000372568765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,2,128,0,1,fp8,fp8,0,0.20155733823776245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,4,128,0,1,float16,float16,0,0.39048532644907635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,4,128,0,1,float16,fp8,0,0.3684693177541097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,12,4,128,0,1,fp8,fp8,0,0.27989333868026733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,12,128,0,1,float16,float16,0,0.24320000410079956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,12,128,0,1,float16,fp8,0,0.17988266547520956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,12,128,0,1,fp8,fp8,0,0.20053333044052124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,1,128,0,1,float16,float16,0,0.07441066702206929
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,1,128,0,1,float16,fp8,0,0.07355733215808868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,1,128,0,1,fp8,fp8,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,2,128,0,1,float16,float16,0,0.07628799974918365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,2,128,0,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,2,128,0,1,fp8,fp8,0,0.05597866574923197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,4,128,0,1,float16,float16,0,0.08721066514650981
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,4,128,0,1,float16,fp8,0,0.07901866734027863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,12,4,128,0,1,fp8,fp8,0,0.057002668579419456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,12,128,0,1,float16,float16,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,12,128,0,1,float16,fp8,0,0.04727466901143392
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,12,128,0,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,1,128,0,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,1,128,0,1,float16,fp8,0,0.04113066693147024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,1,128,0,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,2,128,0,1,float16,float16,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,2,128,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,2,128,0,1,fp8,fp8,0,0.03293866664171219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,4,128,0,1,float16,float16,0,0.043007999658584595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,4,128,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,12,4,128,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,12,128,0,1,float16,float16,0,0.029525332152843475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,12,128,0,1,float16,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,1,128,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,12,128,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,1,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,1,128,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,2,128,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,2,128,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,2,128,0,1,fp8,fp8,0,0.022357332209746044
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,4,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,4,128,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,12,4,128,0,1,fp8,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,12,128,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,12,128,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,12,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,1,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,1,128,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,1,128,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,2,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,2,128,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,2,128,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,4,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,4,128,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,12,4,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,12,128,0,1,float16,float16,0,0.014335999886194864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,12,128,0,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,12,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,1,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,1,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,2,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,1,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,2,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,2,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,4,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,4,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,12,4,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,12,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,12,128,0,1,fp8,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,12,128,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,1,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,1,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,1,128,0,1,fp8,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,2,128,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,2,128,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,2,128,0,1,fp8,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,4,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,4,128,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,12,4,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,12,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,12,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,12,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,1,128,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,1,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,1,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,2,128,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,2,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,2,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,4,128,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,4,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,12,4,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,1,128,0,1,float16,float16,0,0.20736000935236612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,1,128,0,1,float16,fp8,0,0.20070399840672812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,1,128,0,1,fp8,fp8,0,0.15103999773661295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,2,128,0,1,float16,float16,0,0.26282666126887005
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,2,128,0,1,float16,fp8,0,0.24797866741816202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,2,128,0,1,fp8,fp8,0,0.1889280080795288
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,4,128,0,1,float16,float16,0,0.38280534744262695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,4,128,0,1,float16,fp8,0,0.35601067543029785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,12,4,128,0,1,fp8,fp8,0,0.26794666051864624
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,12,128,0,1,float16,fp8,0,0.17322667439778647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,12,128,0,1,float16,float16,0,0.24183466037114462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,12,128,0,1,fp8,fp8,0,0.19524266322453818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,1,128,0,1,float16,float16,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,1,128,0,1,float16,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,1,128,0,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,2,128,0,1,float16,float16,0,0.0631466656923294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,2,128,0,1,float16,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,2,128,0,1,fp8,fp8,0,0.05000533163547516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,4,128,0,1,float16,float16,0,0.07645866771539052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,4,128,0,1,float16,fp8,0,0.06673066814740498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,12,4,128,0,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,12,128,0,1,float16,float16,0,0.04266666869322459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,12,128,0,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,12,128,0,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,1,128,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,1,128,0,1,float16,fp8,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,1,128,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,2,128,0,1,float16,float16,0,0.0365226666132609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,2,128,0,1,float16,fp8,0,0.03703466554482778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,2,128,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,4,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,4,128,0,1,float16,fp8,0,0.037205333511034645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,12,4,128,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,12,128,0,1,float16,float16,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,12,128,0,1,float16,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,12,128,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,1,128,0,1,float16,float16,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,1,128,0,1,float16,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,1,128,0,1,fp8,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,2,128,0,1,float16,float16,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,2,128,0,1,float16,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,2,128,0,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,4,128,0,1,float16,float16,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,4,128,0,1,float16,fp8,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,12,4,128,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,12,128,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,12,128,0,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,1,128,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,12,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,1,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,2,128,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,1,128,0,1,fp8,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,2,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,2,128,0,1,fp8,fp8,0,0.014165333161751429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,4,128,0,1,float16,float16,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,4,128,0,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,12,4,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,12,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,12,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,12,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,1,128,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,1,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,1,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,2,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,2,128,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,2,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,4,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,4,128,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,12,4,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,12,128,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,12,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,12,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,1,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,1,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,1,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,2,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,2,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,2,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,4,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,4,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,12,4,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,12,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,12,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,12,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,1,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,2,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,2,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,4,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,4,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,12,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,12,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,12,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,12,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,1,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,2,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,2,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,4,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,2,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,12,4,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,1,128,0,1,float16,float16,0,0.06724266707897186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,1,128,0,1,float16,fp8,0,0.06690133114655812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,1,128,0,1,fp8,fp8,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,2,128,0,1,float16,float16,0,0.06758399804433186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,2,128,0,1,float16,fp8,0,0.06673066814740498
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,4,128,0,1,float16,float16,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,2,128,0,1,fp8,fp8,0,0.05580799778302511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,4,128,0,1,float16,fp8,0,0.06980266670385997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,12,4,128,0,1,fp8,fp8,0,0.057855998476346336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,12,128,0,1,float16,float16,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,12,128,0,1,float16,fp8,0,0.04215466479460398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,12,128,0,1,fp8,fp8,0,0.034304000437259674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,1,128,0,1,float16,float16,0,0.03788800040880839
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,1,128,0,1,float16,fp8,0,0.03754666695992152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,1,128,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,2,128,0,1,float16,float16,0,0.03822933385769526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,2,128,0,1,float16,fp8,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,2,128,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,4,128,0,1,float16,float16,0,0.03976533313592275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,12,128,0,1,float16,float16,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,4,128,0,1,float16,fp8,0,0.03908266623814901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,12,4,128,0,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,12,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,12,128,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,1,128,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,1,128,0,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,2,128,0,1,float16,float16,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,1,128,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,2,128,0,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,2,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,4,128,0,1,float16,float16,0,0.024405332903067272
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,4,128,0,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,12,4,128,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,12,128,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,12,128,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,12,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,1,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,1,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,1,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,2,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,2,128,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,2,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,4,128,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,4,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,12,4,128,0,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,12,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,12,128,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,12,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,1,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,1,128,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,1,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,2,128,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,2,128,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,2,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,4,128,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,4,128,0,1,float16,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,12,4,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,12,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,12,128,0,1,float16,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,12,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,1,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,1,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,1,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,2,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,2,128,0,1,float16,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,2,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,4,128,0,1,float16,float16,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,4,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,12,4,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,12,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,12,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,12,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,1,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,1,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,2,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,2,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,4,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,4,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,12,4,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,12,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,12,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,12,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,4,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,12,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,12,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,12,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,12,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,4,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,12,4,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,1,128,0,1,float16,float16,0,0.050517335534095764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,1,128,0,1,float16,fp8,0,0.051370665431022644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,1,128,0,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,2,128,0,1,float16,float16,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,2,128,0,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,4,128,0,1,float16,float16,0,0.05222400029500326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,2,128,0,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,4,128,0,1,float16,fp8,0,0.05239466826121012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,12,4,128,0,1,fp8,fp8,0,0.04232533276081085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,12,128,0,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,12,128,0,1,float16,fp8,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,12,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,1,128,0,1,float16,float16,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,1,128,0,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,1,128,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,2,128,0,1,float16,fp8,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,2,128,0,1,float16,float16,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,2,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,4,128,0,1,float16,float16,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,4,128,0,1,float16,fp8,0,0.03054933249950409
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,12,4,128,0,1,fp8,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,12,128,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,12,128,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,12,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,1,128,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,1,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,2,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,2,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,2,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,4,128,0,1,float16,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,4,128,0,1,float16,float16,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,12,4,128,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,12,128,0,1,float16,float16,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,12,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,12,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,1,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,1,128,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,1,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,2,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,2,128,0,1,float16,fp8,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,2,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,4,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,4,128,0,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,12,4,128,0,1,fp8,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,12,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,12,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,12,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,1,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,1,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,2,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,2,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,2,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,4,128,0,1,float16,float16,0,0.03601066768169403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,12,4,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,12,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,12,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,12,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,1,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,1,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,2,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,4,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,4,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,12,4,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,12,128,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,12,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,12,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,1,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,2,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,2,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,4,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,12,4,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,12,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,12,128,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,12,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,1,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,1,128,0,1,fp8,fp8,0,0.009178666397929192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,2,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,2,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,4,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,4,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,12,4,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,12,128,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,12,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,12,128,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,1,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,2,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,4,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,4,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,12,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,1,128,0,1,fp8,fp8,0,16.678229014078777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,2,128,0,1,fp8,fp8,0,16.265727996826172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,float16,0,27.623936971028645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,fp8,0,27.378005981445312
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,fp8,0,27.79869842529297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,float16,0,28.031829833984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,float16,0,27.830785115559895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,fp8,0,28.92987823486328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,8,4,128,0,1,fp8,fp8,0,16.99566904703776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,8,128,0,1,fp8,fp8,0,8.475818634033203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,float16,0,14.136319478352865
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,fp8,0,14.45358912150065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,float16,0,13.680981953938803
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,1,128,0,1,fp8,fp8,0,8.057856241861979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,fp8,0,13.889536539713541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,float16,0,14.369621276855469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,2,128,0,1,fp8,fp8,0,8.25275739034017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,fp8,0,13.90011723836263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,float16,0,6.807210922241211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,fp8,0,7.065429051717122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,float16,0,13.774677276611328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,4,128,0,1,fp8,fp8,0,8.567466735839844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,fp8,0,13.8700803120931
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,8,128,0,1,fp8,fp8,0,4.228608131408691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,float16,0,6.520149230957031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,1,128,0,1,fp8,fp8,0,3.725482622782389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,2,128,0,1,fp8,fp8,0,3.866623878479004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,fp8,0,6.804821650187175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,float16,0,7.104170481363933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,fp8,0,6.859093348185222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,float16,0,7.107242584228516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,fp8,0,7.144106547037761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,8,4,128,0,1,fp8,fp8,0,4.013567924499512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,float16,0,3.567786534627279
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,fp8,0,3.431935946146647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,8,128,0,1,fp8,fp8,0,2.203989346822103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,float16,0,3.2744105656941733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,fp8,0,3.1476052602132163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,1,128,0,1,fp8,fp8,0,1.9073707262674968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,float16,0,3.3215147654215493
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,fp8,0,3.1892480850219727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,2,128,0,1,fp8,fp8,0,1.9189759890238445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,float16,0,3.2114346822102866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,fp8,0,3.327829360961914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,8,4,128,0,1,fp8,fp8,0,2.0133546193440757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,float16,0,15.518549601236979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,1,128,0,1,fp8,fp8,0,9.531733194986979
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,fp8,0,15.681877136230469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,2,128,0,1,fp8,fp8,0,9.504085540771484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,float16,0,15.862443288167318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,fp8,0,15.747071584065756
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,float16,0,16.18346659342448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,fp8,0,15.935829162597656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,8,4,128,0,1,fp8,fp8,0,9.548458735148111
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,float16,0,8.247637430826822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,8,128,0,1,fp8,fp8,0,5.0104319254557295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,fp8,0,8.300714492797852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,float16,0,7.762773513793945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,1,128,0,1,fp8,fp8,0,4.460202534993489
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,fp8,0,7.896234512329102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,float16,0,7.9148375193278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,2,128,0,1,fp8,fp8,0,4.664661407470703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,fp8,0,7.761578877766927
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,float16,0,7.672661463419597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,fp8,0,7.833941141764323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,8,4,128,0,1,fp8,fp8,0,4.684288024902344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,float16,0,4.065962791442871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,fp8,0,3.9787521362304688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,8,128,0,1,fp8,fp8,0,2.589695930480957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,float16,0,3.6841812133789062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,1,128,0,1,fp8,fp8,0,2.1015893618265786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,fp8,0,3.5886081059773765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,float16,0,3.719850540161133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,2,128,0,1,fp8,fp8,0,2.2529706954956055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,fp8,0,3.61028258005778
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,float16,0,3.818496068318685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,fp8,0,3.7795839309692383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,float16,0,1.9563520749409993
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,8,4,128,0,1,fp8,fp8,0,2.323626677195231
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,fp8,0,1.8897919654846191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,8,128,0,1,fp8,fp8,0,1.2571307023366292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,float16,0,1.8990079561869304
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,1,128,0,1,fp8,fp8,0,1.1415893236796062
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,fp8,0,1.8682880401611328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,float16,0,1.9351894060770671
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,fp8,0,1.857877254486084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,2,128,0,1,fp8,fp8,0,1.1496106783548992
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,float16,0,1.8490026791890461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,fp8,0,1.8621439933776855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,8,4,128,0,1,fp8,fp8,0,1.167359987894694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,1,128,0,1,fp8,fp8,0,6.4337921142578125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,float16,0,11.307178497314453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,fp8,0,10.807637532552084
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,2,128,0,1,fp8,fp8,0,6.506666819254558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,float16,0,11.283968607584635
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,fp8,0,11.61898676554362
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,float16,0,11.687594095865885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,fp8,0,11.499691009521484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,float16,0,5.548032124837239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,fp8,0,5.63865598042806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,8,4,128,0,1,fp8,fp8,0,6.837247848510742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,8,128,0,1,fp8,fp8,0,3.567445437113444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,float16,0,4.887893358866374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,fp8,0,5.07477347056071
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,1,128,0,1,fp8,fp8,0,3.0254081090291343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,float16,0,5.442560195922852
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,2,128,0,1,fp8,fp8,0,3.1235411961873374
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,fp8,0,5.385557174682617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,float16,0,5.361152013142903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,fp8,0,5.365589141845703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,8,4,128,0,1,fp8,fp8,0,3.250346819559733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,float16,0,2.8984320958455405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,fp8,0,2.80729611714681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,8,128,0,1,fp8,fp8,0,1.874773343404134
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,float16,0,2.471423943837484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,fp8,0,2.57041072845459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,1,128,0,1,fp8,fp8,0,1.5542613665262859
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,float16,0,2.5552213986714682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,fp8,0,2.493951956431071
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,float16,0,2.61734406153361
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,2,128,0,1,fp8,fp8,0,1.5523840586344402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,fp8,0,2.658986727396647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,8,4,128,0,1,fp8,fp8,0,1.6320853233337402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,float16,0,1.3788159688313801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,fp8,0,1.386837323506673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,8,128,0,1,fp8,fp8,0,0.8934400081634521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,float16,0,1.3341013590494792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,fp8,0,1.3388800621032715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,float16,0,1.382912000020345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,1,128,0,1,fp8,fp8,0,0.8362666765848795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,2,128,0,1,fp8,fp8,0,0.8260266780853271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,float16,0,1.3783040046691895
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,fp8,0,1.3453653653462727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,fp8,0,1.3298346996307373
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,8,4,128,0,1,fp8,fp8,0,0.8301226298014323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,2,128,0,1,fp8,fp8,0,9.004543940226236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,float16,0,14.841173807779947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,1,128,0,1,fp8,fp8,0,9.01034673055013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,fp8,0,14.71658706665039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,float16,0,15.04904556274414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,fp8,0,14.889984130859375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,float16,0,15.08676274617513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,fp8,0,15.006890614827475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,8,128,0,1,fp8,fp8,0,5.054293314615886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,float16,0,7.542954762776692
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,8,4,128,0,1,fp8,fp8,0,9.200810750325521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,fp8,0,7.8755842844645185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,float16,0,6.8980051676432295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,1,128,0,1,fp8,fp8,0,4.146346728006999
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,fp8,0,7.455573399861653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,float16,0,6.850730895996094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,2,128,0,1,fp8,fp8,0,4.209834734598796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,fp8,0,7.189162572224935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,float16,0,3.799893379211426
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,fp8,0,3.871744155883789
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,4,128,0,1,fp8,fp8,0,4.536831855773926
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,8,128,0,1,fp8,fp8,0,2.45196803410848
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,float16,0,7.482367833455403
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,fp8,0,7.745706558227539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,float16,0,3.2863572438557944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,fp8,0,3.307861328125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,1,128,0,1,fp8,fp8,0,1.9677866299947102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,2,128,0,1,fp8,fp8,0,2.051413377126058
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,float16,0,3.4858665466308594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,fp8,0,3.4167467753092446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,float16,0,3.493376096089681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,fp8,0,3.5206826527913413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,float16,0,1.914197285970052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,fp8,0,1.836714744567871
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,8,4,128,0,1,fp8,fp8,0,2.203989346822103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,8,128,0,1,fp8,fp8,0,1.2470613320668538
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,float16,0,1.6853334108988445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,fp8,0,1.6529067357381184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,1,128,0,1,fp8,fp8,0,0.9953280289967855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,float16,0,1.6853334108988445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,2,128,0,1,fp8,fp8,0,1.0019839604695637
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,fp8,0,1.6723626454671223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,fp8,0,1.6539306640625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,float16,0,1.693013350168864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,8,4,128,0,1,fp8,fp8,0,1.1048959891001384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,float16,0,0.9248426755269369
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,fp8,0,0.9202346801757812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,8,128,0,1,fp8,fp8,0,0.5806080102920532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,float16,0,0.8861013253529867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,1,128,0,1,fp8,fp8,0,0.5620053211847941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,fp8,0,0.9113600254058838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,fp8,0,0.927232027053833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,2,128,0,1,fp8,fp8,0,0.5713920195897421
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,float16,0,0.9065813223520914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,float16,0,0.9118719895680746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,fp8,0,0.9239892959594727
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,8,4,128,0,1,fp8,fp8,0,0.5672959884007772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,1,128,0,1,fp8,fp8,0,5.16266663869222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,2,128,0,1,fp8,fp8,0,5.176149368286133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,float16,0,8.428885142008463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,fp8,0,8.265727996826172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,float16,0,8.494250615437826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,fp8,0,8.372906366984049
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,fp8,0,8.691370646158854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,float16,0,8.6560427347819
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,float16,0,4.537173271179199
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,8,128,0,1,fp8,fp8,0,3.102378527323405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,8,4,128,0,1,fp8,fp8,0,5.711189270019531
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,fp8,0,4.458837191263835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,float16,0,3.8888107935587564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,fp8,0,3.970730781555176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,1,128,0,1,fp8,fp8,0,2.353834629058838
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,2,128,0,1,fp8,fp8,0,2.5518080393473306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,fp8,0,3.94871457417806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,float16,0,3.991210619608561
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,4,128,0,1,fp8,fp8,0,2.7106987635294595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,fp8,0,4.110335985819499
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,float16,0,2.273109277089437
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,float16,0,4.276394526163737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,fp8,0,2.2103039423624673
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,8,128,0,1,fp8,fp8,0,1.544533411661784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,float16,0,1.859242598215739
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,1,128,0,1,fp8,fp8,0,1.1441493034362793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,2,128,0,1,fp8,fp8,0,1.2351146539052327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,float16,0,1.9278507232666016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,fp8,0,1.8971306482950847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,fp8,0,1.895253340403239
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,float16,0,2.0222293535868325
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,8,128,0,1,fp8,fp8,0,0.73198930422465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,4,128,0,1,fp8,fp8,0,1.3163519700368245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,fp8,0,2.0377599398295083
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,float16,0,1.0956799983978271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,fp8,0,1.0584746996561687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,float16,0,0.9809919993082682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,fp8,0,0.9992533524831136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,1,128,0,1,fp8,fp8,0,0.5935786565144857
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,float16,0,0.9832106431325277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,2,128,0,1,fp8,fp8,0,0.6220800081888834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,fp8,0,0.9869653383890787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,float16,0,0.9953280289967855
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,fp8,0,1.0112000306447346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,8,4,128,0,1,fp8,fp8,0,0.6162773370742798
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,float16,0,0.5724159876505533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,fp8,0,0.556714653968811
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,8,128,0,1,fp8,fp8,0,0.35652267932891846
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,fp8,0,0.5502293507258097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,float16,0,0.5468159914016724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,float16,0,0.5568853219350179
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,1,128,0,1,fp8,fp8,0,0.35942399501800537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,fp8,0,0.5471573273340861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,2,128,0,1,fp8,fp8,0,0.358570655186971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,float16,0,0.5437440077463785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,fp8,0,0.5459626515706381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,8,4,128,0,1,fp8,fp8,0,0.3604480028152466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,1,128,0,1,fp8,fp8,0,5.153621355692546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,float16,0,7.889919916788737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,fp8,0,8.133290608723959
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,float16,0,8.359082539876303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,fp8,0,8.393215815226236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,2,128,0,1,fp8,fp8,0,5.382485071818034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,float16,0,8.442026774088541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,fp8,0,8.323925018310547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,8,4,128,0,1,fp8,fp8,0,5.848063786824544
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,8,128,0,1,fp8,fp8,0,3.223210652669271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,float16,0,4.642304102579753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,fp8,0,4.625237464904785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,fp8,0,3.6918614705403647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,float16,0,3.788458824157715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,1,128,0,1,fp8,fp8,0,2.3801172574361167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,float16,0,3.8121814727783203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,2,128,0,1,fp8,fp8,0,2.53439998626709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,fp8,0,3.8292481104532876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,float16,0,4.164608001708984
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,fp8,0,4.145834604899089
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,8,4,128,0,1,fp8,fp8,0,2.7460266749064126
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,fp8,0,2.2275412877400718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,float16,0,2.294271945953369
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,8,128,0,1,fp8,fp8,0,1.5822505950927734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,float16,0,1.785685380299886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,fp8,0,1.767082691192627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,1,128,0,1,fp8,fp8,0,1.1622400283813477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,float16,0,1.90720001856486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,fp8,0,1.8252800305684407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,2,128,0,1,fp8,fp8,0,1.2178773085276287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,float16,0,2.0253012975056968
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,fp8,0,1.9664212862650554
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,8,4,128,0,1,fp8,fp8,0,1.3341013590494792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,float16,0,1.1091626485188801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,fp8,0,1.110357364018758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,8,128,0,1,fp8,fp8,0,0.7864320278167725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,float16,0,0.8796160221099854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,fp8,0,0.8898560206095377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,1,128,0,1,fp8,fp8,0,0.5394773483276367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,float16,0,0.8871253331502279
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,2,128,0,1,fp8,fp8,0,0.5623466571172079
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,fp8,0,0.8891733487447103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,float16,0,0.9431040287017822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,fp8,0,0.9330346584320068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,8,4,128,0,1,fp8,fp8,0,0.6193493207295736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,float16,0,0.508074680964152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,8,128,0,1,fp8,fp8,0,0.3203413287798564
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,fp8,0,0.5114880005518595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,float16,0,0.482474684715271
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,fp8,0,0.478549321492513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,1,128,0,1,fp8,fp8,0,0.3123199939727783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,fp8,0,0.4776959816614787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,float16,0,0.4957866668701172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,2,128,0,1,fp8,fp8,0,0.30617600679397583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,float16,0,0.48793598016103107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,fp8,0,0.4991999864578247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,8,4,128,0,1,fp8,fp8,0,0.311296006043752
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,fp8,0,0.3126613299051921
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,float16,0,0.30532266696294147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,8,128,0,1,fp8,fp8,0,0.1996799906094869
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,float16,0,0.30617600679397583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,fp8,0,0.29730133215586346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,1,128,0,1,fp8,fp8,0,0.20121600230534872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,float16,0,0.2988373239835103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,fp8,0,0.30429865916570026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,2,128,0,1,fp8,fp8,0,0.2032639980316162
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,float16,0,0.2954240043958028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,fp8,0,0.2954240043958028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,8,4,128,0,1,fp8,fp8,0,0.20684800545374551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,float16,0,4.542805353800456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,fp8,0,4.473002751668294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,1,128,0,1,fp8,fp8,0,3.1713279088338218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,2,128,0,1,fp8,fp8,0,3.337216059366862
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,float16,0,4.721834818522136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,fp8,0,4.803413391113281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,float16,0,5.1024214426676435
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,fp8,0,5.175125439961751
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,8,4,128,0,1,fp8,fp8,0,3.6753066380818686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,float16,0,2.8562774658203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,8,128,0,1,fp8,fp8,0,2.049877325693766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,float16,0,2.2033066749572754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,fp8,0,2.2043306032816568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,fp8,0,2.8427947362264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,1,128,0,1,fp8,fp8,0,1.4283092816670735
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,float16,0,2.3514453570048013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,fp8,0,2.2502400080362954
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,2,128,0,1,fp8,fp8,0,1.542143980662028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,fp8,0,1.3637973467508953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,4,128,0,1,fp8,fp8,0,1.729706605275472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,fp8,0,2.4917333920796714
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,float16,0,2.487295945485433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,float16,0,1.4165333112080891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,8,128,0,1,fp8,fp8,0,1.0315093199412029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,float16,0,1.01256529490153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,fp8,0,1.008128007253011
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,1,128,0,1,fp8,fp8,0,0.6422186692555746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,float16,0,1.1008000373840332
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,2,128,0,1,fp8,fp8,0,0.7159466743469238
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,fp8,0,1.0658133029937744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,float16,0,1.2187306880950928
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,fp8,0,1.198421319325765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,8,4,128,0,1,fp8,fp8,0,0.8248319625854492
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,float16,0,0.6377813418706259
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,fp8,0,0.5841919978459676
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,8,128,0,1,fp8,fp8,0,0.4710400104522705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,float16,0,0.5312853256861368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,fp8,0,0.5376000006993612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,1,128,0,1,fp8,fp8,0,0.3304106593132019
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,float16,0,0.5585920015970866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,fp8,0,0.5312853256861368
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,2,128,0,1,fp8,fp8,0,0.33177600304285687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,float16,0,0.5387946764628092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,fp8,0,0.5522773265838623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,8,4,128,0,1,fp8,fp8,0,0.33638401826222736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,float16,0,0.31539199749628705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,fp8,0,0.3099306623140971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,float16,0,0.2949120004971822
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,8,128,0,1,fp8,fp8,0,0.202239990234375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,fp8,0,0.29320534070332843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,1,128,0,1,fp8,fp8,0,0.1889280080795288
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,float16,0,0.2998613317807515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,2,128,0,1,fp8,fp8,0,0.19575466712315878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,fp8,0,0.29286400477091473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,fp8,0,0.29337600866953534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,4,128,0,1,fp8,fp8,0,0.19933867454528809
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,float16,0,0.3007146716117859
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,float16,0,0.18995199600855509
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,fp8,0,0.18346667289733887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,8,128,0,1,fp8,fp8,0,0.12834133704503378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,float16,0,0.19234132766723633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,fp8,0,0.1879040002822876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,1,128,0,1,fp8,fp8,0,0.12919466694196066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,float16,0,0.19114667177200317
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,fp8,0,0.19165867567062378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,2,128,0,1,fp8,fp8,0,0.1264639993508657
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,float16,0,0.18602667252222696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,fp8,0,0.187391996383667
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,8,4,128,0,1,fp8,fp8,0,0.12987732887268066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,float16,0,4.801024119059245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,fp8,0,4.762965202331543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,1,128,0,1,fp8,fp8,0,3.405311902364095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,float16,0,5.122901280721028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,fp8,0,5.032106717427571
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,2,128,0,1,fp8,fp8,0,3.5858774185180664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,float16,0,5.395968119303386
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,fp8,0,5.346815745035808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,float16,0,3.0417919158935547
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,8,4,128,0,1,fp8,fp8,0,3.975338617960612
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,fp8,0,3.0667092005411782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,float16,0,2.225663979848226
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,fp8,0,2.1828266779581704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,8,128,0,1,fp8,fp8,0,2.3232852617899575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,1,128,0,1,fp8,fp8,0,1.5716692606608074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,float16,0,2.3466666539510093
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,fp8,0,2.312191963195801
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,2,128,0,1,fp8,fp8,0,1.6788479487101238
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,float16,0,2.63645871480306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,4,128,0,1,fp8,fp8,0,1.8906453450520833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,fp8,0,2.5664854049682617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,float16,0,1.5218346913655598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,fp8,0,1.4817280769348145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,8,128,0,1,fp8,fp8,0,1.1180373032887776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,float16,0,1.049770673116048
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,fp8,0,1.0369706948598225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,1,128,0,1,fp8,fp8,0,0.7412052949269613
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,float16,0,1.1209386984507244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,fp8,0,1.110357364018758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,2,128,0,1,fp8,fp8,0,0.7874560356140137
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,float16,0,1.2747093041737874
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,fp8,0,1.2397226492563884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,8,4,128,0,1,fp8,fp8,0,0.9074347019195557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,float16,0,0.7135573228200277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,8,128,0,1,fp8,fp8,0,0.5328213373819987
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,fp8,0,0.6910293102264404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,float16,0,0.4930560191472371
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,fp8,0,0.49698134263356525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,1,128,0,1,fp8,fp8,0,0.306005338827769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,float16,0,0.5017600059509277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,fp8,0,0.4978346824645996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,2,128,0,1,fp8,fp8,0,0.3094186584154765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,float16,0,0.5447680155436198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,fp8,0,0.5353813171386719
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,8,4,128,0,1,fp8,fp8,0,0.40379734834035236
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,float16,0,0.2855253418286641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,fp8,0,0.2916693290074666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,8,128,0,1,fp8,fp8,0,0.18568533658981323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,float16,0,0.26026666164398193
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,fp8,0,0.26180267333984375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,1,128,0,1,fp8,fp8,0,0.16964266697565714
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,fp8,0,0.27084799607594806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,float16,0,0.27852799495061237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,2,128,0,1,fp8,fp8,0,0.16964266697565714
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,float16,0,0.2797226707140605
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,fp8,0,0.27989333868026733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,8,4,128,0,1,fp8,fp8,0,0.17783466974894205
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,fp8,0,0.16520532965660095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,float16,0,0.16947199900945029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,8,128,0,1,fp8,fp8,0,0.10939733187357585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,float16,0,0.16827734311421713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,fp8,0,0.16605866948763529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,1,128,0,1,fp8,fp8,0,0.10922666390736897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,float16,0,0.1693013310432434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,fp8,0,0.16725333531697592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,2,128,0,1,fp8,fp8,0,0.10905599594116211
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,float16,0,0.16554666558901468
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,fp8,0,0.16657066345214844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,8,4,128,0,1,fp8,fp8,0,0.11178666353225708
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,fp8,0,0.10205866893132527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,float16,0,0.09745066364606221
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,8,128,0,1,fp8,fp8,0,0.06297599772612254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,float16,0,0.1013759970664978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,fp8,0,0.0981333355108897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,1,128,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,float16,0,0.10120532910029094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,fp8,0,0.10359467069307964
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,2,128,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,float16,0,0.0981333355108897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,4,128,0,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,fp8,0,0.1032533347606659
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,float16,0,2.9470720291137695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,1,128,0,1,fp8,fp8,0,2.1017600695292153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,fp8,0,2.9224958419799805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,float16,0,3.136512120564779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,fp8,0,3.1081813176472983
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,2,128,0,1,fp8,fp8,0,2.2621866861979165
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,float16,0,3.387392044067383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,fp8,0,3.3914880752563477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,8,4,128,0,1,fp8,fp8,0,2.5279146830240884
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,float16,0,1.9974826176961262
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,fp8,0,1.961301326751709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,8,128,0,1,fp8,fp8,0,1.5223466555277507
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,float16,0,1.3839359283447266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,1,128,0,1,fp8,fp8,0,0.9620479742685953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,fp8,0,1.3590186436971028
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,float16,0,1.469098726908366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,fp8,0,1.4324053128560383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,2,128,0,1,fp8,fp8,0,1.0559146404266357
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,float16,0,1.6511999766031902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,fp8,0,1.6489814122517903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,8,4,128,0,1,fp8,fp8,0,1.2218026320139568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,float16,0,0.9704106648763021
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,fp8,0,0.934058666229248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,8,128,0,1,fp8,fp8,0,0.7524693012237549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,float16,0,0.596992015838623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,fp8,0,0.5860693454742432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,1,128,0,1,fp8,fp8,0,0.4060159921646118
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,float16,0,0.6772053241729736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,2,128,0,1,fp8,fp8,0,0.466261347134908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,fp8,0,0.6570666631062826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,fp8,0,0.7635626792907715
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,float16,0,0.7842133045196533
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,float16,0,0.4256426493326823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,8,4,128,0,1,fp8,fp8,0,0.5727573235829672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,fp8,0,0.3676160176595052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,8,128,0,1,fp8,fp8,0,0.33740798632303876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,float16,0,0.29815467198689777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,fp8,0,0.30139732360839844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,1,128,0,1,fp8,fp8,0,0.18346667289733887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,float16,0,0.30446932713190716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,2,128,0,1,fp8,fp8,0,0.18175999323527017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,fp8,0,0.3020799954732259
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,float16,0,0.3107840021451314
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,4,128,0,1,fp8,fp8,0,0.20616533358891806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,fp8,0,0.31641600529352826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,float16,0,0.17988266547520956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,fp8,0,0.17749333381652832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,8,128,0,1,fp8,fp8,0,0.1104213297367096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,float16,0,0.16537599762280783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,fp8,0,0.1713493267695109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,1,128,0,1,fp8,fp8,0,0.11434666315714519
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,float16,0,0.16776533921559653
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,fp8,0,0.1693013310432434
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,2,128,0,1,fp8,fp8,0,0.11264000336329143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,float16,0,0.17390932639439902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,fp8,0,0.16793600718180338
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,8,4,128,0,1,fp8,fp8,0,0.11110400160153706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,float16,0,0.11161599556605022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,fp8,0,0.1114453375339508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,8,128,0,1,fp8,fp8,0,0.0773119976123174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,float16,0,0.11246933539708455
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,fp8,0,0.11229866743087769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,1,128,0,1,fp8,fp8,0,0.07987200220425923
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,fp8,0,0.1083733340104421
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,float16,0,0.11229866743087769
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,2,128,0,1,fp8,fp8,0,0.07918933530648549
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,float16,0,0.1109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,fp8,0,0.11025066177050273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,8,4,128,0,1,fp8,fp8,0,0.07884799937407176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,float16,0,0.07372800012429555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,fp8,0,0.07355733215808868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,8,128,0,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,fp8,0,0.07389866809050243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,float16,0,0.07389866809050243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,1,128,0,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,float16,0,0.07389866809050243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,2,128,0,1,fp8,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,fp8,0,0.07458133498827617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,float16,0,0.07406933108965556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,fp8,0,0.07338666419188182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,8,4,128,0,1,fp8,fp8,0,0.05171200136343638
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,float16,0,2.9156694412231445
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,1,128,0,1,fp8,fp8,0,1.9244373639424641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,fp8,0,2.8504746754964194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,float16,0,3.178325335184733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,fp8,0,3.1127894719441733
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,2,128,0,1,fp8,fp8,0,2.1548372904459634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,float16,0,3.6258134841918945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,fp8,0,3.467434565226237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,float16,0,2.2055253982543945
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,8,4,128,0,1,fp8,fp8,0,2.6338987350463867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,fp8,0,2.094933350880941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,8,128,0,1,fp8,fp8,0,1.6638293266296387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,float16,0,1.4801920255025227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,fp8,0,1.4457173347473145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,1,128,0,1,fp8,fp8,0,0.957098642985026
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,float16,0,1.557162602742513
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,fp8,0,1.5237119992574055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,2,128,0,1,fp8,fp8,0,1.057792027791341
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,float16,0,1.7780052820841472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,fp8,0,1.7256107330322266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,8,4,128,0,1,fp8,fp8,0,1.2547413508097331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,float16,0,1.1042133172353108
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,fp8,0,1.0361173152923584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,8,128,0,1,fp8,fp8,0,0.7936000029246012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,float16,0,0.696832021077474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,1,128,0,1,fp8,fp8,0,0.4869120121002197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,fp8,0,0.687274694442749
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,float16,0,0.7869439919789633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,2,128,0,1,fp8,fp8,0,0.5261653264363607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,fp8,0,0.7719253698984782
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,float16,0,0.8797866503397623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,fp8,0,0.8488960266113281
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,8,4,128,0,1,fp8,fp8,0,0.605183998743693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,float16,0,0.5398186842600504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,fp8,0,0.5077333450317383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,8,128,0,1,fp8,fp8,0,0.39099733034769696
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,float16,0,0.28859732548395794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,fp8,0,0.2916693290074666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,1,128,0,1,fp8,fp8,0,0.17271467049916586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,float16,0,0.2935466567675273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,fp8,0,0.29047467311223346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,2,128,0,1,fp8,fp8,0,0.18466132879257202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,float16,0,0.3433813254038493
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,fp8,0,0.32767999172210693
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,fp8,0,0.16110933820406595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,8,4,128,0,1,fp8,fp8,0,0.2892799973487854
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,float16,0,0.17476266622543335
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,8,128,0,1,fp8,fp8,0,0.1032533347606659
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,float16,0,0.15223466356595358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,fp8,0,0.14916266997655234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,float16,0,0.1629866659641266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,1,128,0,1,fp8,fp8,0,0.10069333513577779
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,fp8,0,0.15223466356595358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,2,128,0,1,fp8,fp8,0,0.10564266641934712
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,float16,0,0.16366933782895407
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,fp8,0,0.15923200050989786
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,8,4,128,0,1,fp8,fp8,0,0.1013759970664978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,float16,0,0.10257066289583842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,fp8,0,0.1032533347606659
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,float16,0,0.09642666578292847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,8,128,0,1,fp8,fp8,0,0.062463998794555664
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,fp8,0,0.1013759970664978
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,1,128,0,1,fp8,fp8,0,0.06144000093142191
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,float16,0,0.0988159974416097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,fp8,0,0.10018133123715718
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,2,128,0,1,fp8,fp8,0,0.05819733440876007
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,float16,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,fp8,0,0.09642666578292847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,8,4,128,0,1,fp8,fp8,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,float16,0,0.05529599885145823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,fp8,0,0.05529599885145823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,8,128,0,1,fp8,fp8,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,fp8,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,float16,0,0.054272000988324486
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,1,128,0,1,fp8,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,2,128,0,1,fp8,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,fp8,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,float16,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,fp8,0,0.05461333195368449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,8,4,128,0,1,fp8,fp8,0,0.03839999934037527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,float16,0,0.04334933559099833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,fp8,0,0.043178667624791466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,8,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,float16,0,0.04266666869322459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,fp8,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,1,128,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,float16,0,0.043178667624791466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,fp8,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,float16,0,0.043007999658584595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,2,128,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,fp8,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,8,4,128,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,1,128,0,1,fp8,fp8,0,1.4812159538269043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,1,128,0,1,float16,float16,0,2.150229295094808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,1,128,0,1,float16,fp8,0,2.1195093790690103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,2,128,0,1,float16,float16,0,2.3386452992757163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,2,128,0,1,float16,fp8,0,2.2804479598999023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,2,128,0,1,fp8,fp8,0,1.6868693033854167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,4,128,0,1,float16,float16,0,2.9544105529785156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,4,128,0,1,float16,fp8,0,2.7709439595540366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,8,128,0,1,float16,float16,0,2.0671146710713706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,8,128,0,1,float16,fp8,0,1.9655680656433105
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,8,4,128,0,1,fp8,fp8,0,2.2026240030924478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,8,128,0,1,fp8,fp8,0,1.5254185994466145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,1,128,0,1,float16,float16,0,1.0605226357777913
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,1,128,0,1,float16,fp8,0,1.0528426965077717
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,1,128,0,1,fp8,fp8,0,0.7837013403574625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,2,128,0,1,float16,float16,0,1.1760640144348145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,2,128,0,1,float16,fp8,0,1.1520000298817952
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,2,128,0,1,fp8,fp8,0,0.8282453219095866
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,4,128,0,1,float16,fp8,0,1.3750613530476887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,4,128,0,1,float16,float16,0,1.4417920112609863
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,8,4,128,0,1,fp8,fp8,0,1.040554682413737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,8,128,0,1,float16,float16,0,1.0328746636708577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,8,128,0,1,float16,fp8,0,0.9755307038625082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,8,128,0,1,fp8,fp8,0,0.7330133120218912
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,1,128,0,1,fp8,fp8,0,0.3563520113627116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,1,128,0,1,float16,fp8,0,0.508074680964152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,1,128,0,1,float16,float16,0,0.5128533442815145
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,2,128,0,1,float16,float16,0,0.5809493462244669
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,2,128,0,1,float16,fp8,0,0.5708800156911215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,2,128,0,1,fp8,fp8,0,0.4073813358942668
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,4,128,0,1,float16,float16,0,0.7200427055358887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,4,128,0,1,float16,fp8,0,0.6918826897939047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,8,4,128,0,1,fp8,fp8,0,0.5089279810587565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,8,128,0,1,fp8,fp8,0,0.33023999134699505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,8,128,0,1,float16,fp8,0,0.4556800127029419
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,8,128,0,1,float16,float16,0,0.49664000670115155
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,1,128,0,1,float16,float16,0,0.18858667214711508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,1,128,0,1,float16,fp8,0,0.18824533621470133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,1,128,0,1,fp8,fp8,0,0.11793067057927449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,2,128,0,1,float16,float16,0,0.19729065895080566
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,2,128,0,1,float16,fp8,0,0.19438934326171875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,2,128,0,1,fp8,fp8,0,0.13738666971524557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,4,128,0,1,float16,float16,0,0.24576000372568765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,4,128,0,1,float16,fp8,0,0.22254933913548788
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,8,4,128,0,1,fp8,fp8,0,0.22476800282796225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,8,128,0,1,float16,float16,0,0.11571199695269267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,8,128,0,1,float16,fp8,0,0.10717866818110149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,8,128,0,1,fp8,fp8,0,0.06946133573849995
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,1,128,0,1,float16,float16,0,0.0981333355108897
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,1,128,0,1,float16,fp8,0,0.09847467144330342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,1,128,0,1,fp8,fp8,0,0.06724266707897186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,2,128,0,1,float16,float16,0,0.10086400310198466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,2,128,0,1,float16,fp8,0,0.09727999567985535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,2,128,0,1,fp8,fp8,0,0.067071999112765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,4,128,0,1,float16,float16,0,0.10410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,4,128,0,1,float16,fp8,0,0.10240000486373901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,8,4,128,0,1,fp8,fp8,0,0.06877866884072621
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,8,128,0,1,float16,float16,0,0.06331733365853627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,8,128,0,1,float16,fp8,0,0.06229333579540253
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,8,128,0,1,fp8,fp8,0,0.04215466479460398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,1,128,0,1,float16,float16,0,0.0580266664425532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,1,128,0,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,1,128,0,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,2,128,0,1,float16,float16,0,0.05717333157857259
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,2,128,0,1,float16,fp8,0,0.062122667829195656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,2,128,0,1,fp8,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,4,128,0,1,float16,float16,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,4,128,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,8,4,128,0,1,float16,fp8,0,0.0602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,8,128,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,8,128,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,8,128,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,1,128,0,1,float16,float16,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,1,128,0,1,float16,fp8,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,2,128,0,1,float16,float16,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,1,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,2,128,0,1,float16,fp8,0,0.034815999368826546
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,2,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,4,128,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,4,128,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,8,4,128,0,1,float16,fp8,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,8,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,8,128,0,1,float16,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,8,128,0,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,1,128,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,1,128,0,1,float16,fp8,0,0.0264533335963885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,1,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,2,128,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,2,128,0,1,float16,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,2,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,4,128,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,4,128,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,8,4,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,8,128,0,1,float16,float16,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,8,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,8,128,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,1,128,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,1,128,0,1,float16,fp8,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,1,128,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,2,128,0,1,float16,float16,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,2,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,2,128,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,4,128,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,4,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,8,4,128,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,1,128,0,1,float16,float16,0,0.9838933149973551
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,1,128,0,1,float16,fp8,0,0.9673386414845785
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,1,128,0,1,fp8,fp8,0,0.6546773513158163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,2,128,0,1,float16,float16,0,1.1240106423695881
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,2,128,0,1,float16,fp8,0,1.0949973265329997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,2,128,0,1,fp8,fp8,0,0.7517866293589274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,4,128,0,1,float16,float16,0,1.4504960378011067
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,4,128,0,1,float16,fp8,0,1.3806932767232258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,8,4,128,0,1,fp8,fp8,0,1.025877316792806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,8,128,0,1,float16,float16,0,1.0296320120493572
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,8,128,0,1,float16,fp8,0,0.9710933367411295
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,8,128,0,1,fp8,fp8,0,0.7350613276163737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,1,128,0,1,float16,float16,0,0.44236799081166583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,1,128,0,1,float16,fp8,0,0.4261546532313029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,1,128,0,1,fp8,fp8,0,0.3012266755104065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,2,128,0,1,float16,float16,0,0.5589333375295004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,2,128,0,1,float16,fp8,0,0.541866660118103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,2,128,0,1,fp8,fp8,0,0.3573760191599528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,4,128,0,1,float16,float16,0,0.7224319775899252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,4,128,0,1,float16,fp8,0,0.6915413538614908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,8,4,128,0,1,fp8,fp8,0,0.47035733858744305
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,8,128,0,1,float16,float16,0,0.49561599890391034
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,8,128,0,1,float16,fp8,0,0.47359999020894367
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,8,128,0,1,fp8,fp8,0,0.31539199749628705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,1,128,0,1,float16,float16,0,0.13567999998728433
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,1,128,0,1,float16,fp8,0,0.12100266416867574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,1,128,0,1,fp8,fp8,0,0.08891733487447102
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,2,128,0,1,float16,fp8,0,0.1389226714769999
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,4,128,0,1,float16,float16,0,0.1991680065790812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,2,128,0,1,float16,float16,0,0.1384106675783793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,2,128,0,1,fp8,fp8,0,0.08942932883898418
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,4,128,0,1,float16,fp8,0,0.169813334941864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,8,4,128,0,1,fp8,fp8,0,0.19831466674804688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,8,128,0,1,float16,float16,0,0.09130666653315227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,8,128,0,1,float16,fp8,0,0.07970133423805237
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,1,128,0,1,float16,float16,0,0.06690133114655812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,8,128,0,1,fp8,fp8,0,0.05376000205675761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,1,128,0,1,float16,fp8,0,0.06758399804433186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,1,128,0,1,fp8,fp8,0,0.051029334465662636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,2,128,0,1,float16,float16,0,0.06843733290831248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,2,128,0,1,float16,fp8,0,0.06860800087451935
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,2,128,0,1,fp8,fp8,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,4,128,0,1,float16,fp8,0,0.06997333467006683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,4,128,0,1,float16,float16,0,0.07321600119272868
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,8,4,128,0,1,fp8,fp8,0,0.05273599922657013
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,8,128,0,1,float16,float16,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,8,128,0,1,float16,fp8,0,0.042837331692377724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,8,128,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,1,128,0,1,float16,float16,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,1,128,0,1,float16,fp8,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,1,128,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,2,128,0,1,float16,float16,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,2,128,0,1,float16,fp8,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,2,128,0,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,4,128,0,1,float16,float16,0,0.04164266586303711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,4,128,0,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,8,4,128,0,1,fp8,fp8,0,0.032085334261258446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,8,128,0,1,float16,float16,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,8,128,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,1,128,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,8,128,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,1,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,1,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,2,128,0,1,float16,float16,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,2,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,2,128,0,1,float16,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,4,128,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,4,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,8,4,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,8,128,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,8,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,8,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,1,128,0,1,float16,float16,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,1,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,1,128,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,2,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,2,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,4,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,2,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,4,128,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,8,4,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,8,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,8,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,8,128,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,1,128,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,1,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,1,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,2,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,2,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,2,128,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,4,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,4,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,8,4,128,0,1,fp8,fp8,0,0.013653332988421122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,8,128,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,8,128,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,8,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,1,128,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,1,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,1,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,2,128,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,4,128,0,1,float16,float16,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,2,128,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,2,128,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,4,128,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,8,4,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,1,128,0,1,float16,float16,0,0.44202665487925213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,1,128,0,1,float16,fp8,0,0.4194986820220947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,1,128,0,1,fp8,fp8,0,0.27613866329193115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,2,128,0,1,float16,float16,0,0.5631999969482422
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,2,128,0,1,fp8,fp8,0,0.33501867453257245
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,2,128,0,1,float16,fp8,0,0.5410133202870687
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,4,128,0,1,float16,float16,0,0.733184019724528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,4,128,0,1,float16,fp8,0,0.6941013336181641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,8,4,128,0,1,fp8,fp8,0,0.48520533243815106
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,8,128,0,1,float16,float16,0,0.4944213231404622
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,8,128,0,1,float16,fp8,0,0.460970679918925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,8,128,0,1,fp8,fp8,0,0.29764266808827716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,1,128,0,1,float16,float16,0,0.09779199957847595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,1,128,0,1,float16,fp8,0,0.09727999567985535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,2,128,0,1,float16,float16,0,0.11383466919263203
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,1,128,0,1,fp8,fp8,0,0.07526400188604991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,2,128,0,1,float16,fp8,0,0.10769066214561462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,2,128,0,1,fp8,fp8,0,0.08106666803359985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,4,128,0,1,float16,float16,0,0.19541333119074503
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,4,128,0,1,float16,fp8,0,0.15411200126012167
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,8,4,128,0,1,fp8,fp8,0,0.1776640017827352
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,8,128,0,1,float16,fp8,0,0.06007466713587443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,8,128,0,1,float16,float16,0,0.0747519979874293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,8,128,0,1,fp8,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,1,128,0,1,float16,float16,0,0.05222400029500326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,1,128,0,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,1,128,0,1,fp8,fp8,0,0.043178667624791466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,2,128,0,1,float16,float16,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,2,128,0,1,float16,fp8,0,0.05239466826121012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,2,128,0,1,fp8,fp8,0,0.043007999658584595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,4,128,0,1,float16,float16,0,0.05649066468079885
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,4,128,0,1,float16,fp8,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,8,4,128,0,1,fp8,fp8,0,0.045226668318112694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,8,128,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,8,128,0,1,float16,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,8,128,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,1,128,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,1,128,0,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,1,128,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,2,128,0,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,2,128,0,1,float16,float16,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,2,128,0,1,fp8,fp8,0,0.025429333249727886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,4,128,0,1,float16,float16,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,4,128,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,8,4,128,0,1,fp8,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,8,128,0,1,float16,float16,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,8,128,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,1,128,0,1,float16,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,8,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,1,128,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,1,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,2,128,0,1,float16,float16,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,2,128,0,1,float16,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,2,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,4,128,0,1,float16,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,4,128,0,1,float16,float16,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,8,128,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,8,4,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,8,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,8,128,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,1,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,1,128,0,1,float16,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,1,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,2,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,2,128,0,1,float16,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,2,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,4,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,4,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,8,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,8,4,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,8,128,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,8,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,1,128,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,1,128,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,1,128,0,1,fp8,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,2,128,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,2,128,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,4,128,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,2,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,4,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,8,4,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,8,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,8,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,8,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,1,128,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,1,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,1,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,2,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,2,128,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,2,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,4,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,4,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,8,4,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,8,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,8,128,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,8,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,1,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,1,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,1,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,2,128,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,2,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,2,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,4,128,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,4,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,8,4,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,1,128,0,1,float16,float16,0,0.0885759989420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,1,128,0,1,float16,fp8,0,0.08516266942024231
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,2,128,0,1,float16,float16,0,0.10444800059000652
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,1,128,0,1,fp8,fp8,0,0.06980266670385997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,2,128,0,1,float16,fp8,0,0.0942080020904541
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,2,128,0,1,fp8,fp8,0,0.07236266632874806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,4,128,0,1,float16,float16,0,0.18466132879257202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,4,128,0,1,float16,fp8,0,0.15069866180419922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,8,4,128,0,1,fp8,fp8,0,0.17390932639439902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,8,128,0,1,float16,fp8,0,0.052906667192777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,8,128,0,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,8,128,0,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,1,128,0,1,float16,float16,0,0.04607999821503957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,1,128,0,1,float16,fp8,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,1,128,0,1,fp8,fp8,0,0.03908266623814901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,2,128,0,1,float16,float16,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,2,128,0,1,float16,fp8,0,0.04642133414745331
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,2,128,0,1,fp8,fp8,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,4,128,0,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,4,128,0,1,float16,fp8,0,0.04863999783992767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,8,128,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,8,4,128,0,1,fp8,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,8,128,0,1,float16,float16,0,0.0314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,8,128,0,1,fp8,fp8,0,0.02457600086927414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,1,128,0,1,float16,float16,0,0.027818667391935985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,1,128,0,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,1,128,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,2,128,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,2,128,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,2,128,0,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,4,128,0,1,float16,float16,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,4,128,0,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,8,4,128,0,1,fp8,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,8,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,8,128,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,8,128,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,1,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,1,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,1,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,2,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,2,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,2,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,4,128,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,4,128,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,8,4,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,8,128,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,8,128,0,1,float16,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,8,128,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,1,128,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,1,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,1,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,2,128,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,2,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,2,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,4,128,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,4,128,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,8,4,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,8,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,8,128,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,8,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,1,128,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,1,128,0,1,float16,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,1,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,2,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,2,128,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,2,128,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,4,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,4,128,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,8,4,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,8,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,8,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,1,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,1,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,1,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,2,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,2,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,4,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,8,4,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,8,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,8,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,8,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,1,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,1,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,2,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,2,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,4,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,8,4,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,8,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,8,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,8,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,1,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,2,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,2,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,2,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,4,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,4,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,8,4,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,1,128,0,1,float16,float16,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,1,128,0,1,float16,fp8,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,1,128,0,1,fp8,fp8,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,2,128,0,1,float16,float16,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,2,128,0,1,float16,fp8,0,0.04898133377234141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,2,128,0,1,fp8,fp8,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,4,128,0,1,float16,float16,0,0.05256533126036326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,4,128,0,1,float16,fp8,0,0.051541333397229515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,8,4,128,0,1,fp8,fp8,0,0.045909335215886436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,8,128,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,8,128,0,1,float16,fp8,0,0.030720000465710957
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,8,128,0,1,fp8,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,1,128,0,1,float16,float16,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,1,128,0,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,1,128,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,2,128,0,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,2,128,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,2,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,4,128,0,1,float16,float16,0,0.030207999050617218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,4,128,0,1,float16,fp8,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,8,4,128,0,1,fp8,fp8,0,0.02628266563018163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,8,128,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,8,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,8,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,1,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,1,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,1,128,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,2,128,0,1,float16,float16,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,2,128,0,1,float16,fp8,0,0.018602666755517323
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,2,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,4,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,4,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,8,4,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,8,128,0,1,float16,float16,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,8,128,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,8,128,0,1,fp8,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,1,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,1,128,0,1,fp8,fp8,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,1,128,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,2,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,2,128,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,2,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,4,128,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,4,128,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,8,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,8,4,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,8,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,1,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,1,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,2,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,1,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,2,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,4,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,8,4,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,8,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,8,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,8,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,1,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,1,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,2,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,2,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,2,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,4,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,4,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,8,4,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,8,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,8,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,8,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,1,128,0,1,fp8,fp8,0,0.009359999870260557
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,4,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,8,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,8,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,8,128,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,1,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,4,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,8,4,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,8,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,8,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,8,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,1,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,1,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,2,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,4,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,8,4,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,1,128,0,1,float16,float16,0,0.03822933385769526
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,1,128,0,1,float16,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,1,128,0,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,2,128,0,1,float16,float16,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,2,128,0,1,float16,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,2,128,0,1,fp8,fp8,0,0.03259733319282532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,4,128,0,1,float16,float16,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,4,128,0,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,8,4,128,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,8,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,8,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,8,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,1,128,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,1,128,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,1,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,2,128,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,2,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,2,128,0,1,float16,fp8,0,0.022698665658632915
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,4,128,0,1,float16,float16,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,4,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,8,4,128,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,8,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,8,128,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,8,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,1,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,1,128,0,1,float16,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,2,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,1,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,2,128,0,1,float16,fp8,0,0.0145066666106383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,2,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,4,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,4,128,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,8,4,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,8,128,0,1,float16,float16,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,8,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,8,128,0,1,fp8,fp8,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,1,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,1,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,1,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,2,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,2,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,2,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,4,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,4,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,8,4,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,8,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,8,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,8,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,1,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,1,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,1,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,2,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,2,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,4,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,4,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,8,4,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,8,128,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,8,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,8,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,1,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,4,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,8,4,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,8,128,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,8,128,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,8,128,0,1,fp8,fp8,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,1,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,2,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,4,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,4,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,8,4,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,8,128,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,8,128,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,8,128,0,1,fp8,fp8,0,0.007850666840871176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,1,128,0,1,float16,float16,0,0.00973866693675518
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,1,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,2,128,0,1,float16,float16,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,2,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,2,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,4,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,4,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,8,4,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,8,128,0,1,float16,float16,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,8,128,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,8,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,1,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,2,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,2,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,4,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,4,128,0,1,float16,fp8,0,0.009701333319147428
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,8,4,128,0,1,fp8,fp8,0,0.009173333023985228
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,1,128,0,1,fp8,fp8,0,8.316928227742514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,float16,0,6.971221288045247
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,float16,0,14.14297612508138
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,fp8,0,13.972309112548828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,2,128,0,1,fp8,fp8,0,8.463359832763672
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,fp8,0,7.0190080006917315
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,fp8,0,13.70999526977539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,float16,0,14.08733876546224
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,4,128,0,1,fp8,fp8,0,4.11409060160319
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,1,128,0,1,fp8,fp8,0,3.9770453770955405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,float16,0,6.723413467407227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,2,128,0,1,fp8,fp8,0,4.034048080444336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,float16,0,3.5143680572509766
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,float16,0,6.798506418863933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,fp8,0,6.703786849975586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,fp8,0,6.981973648071289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,fp8,0,3.454805374145508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,4,128,0,1,fp8,fp8,0,2.1787306467692056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,float16,0,3.2179199854532876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,1,128,0,1,fp8,fp8,0,1.931605339050293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,fp8,0,3.1660372416178384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,float16,0,3.252394676208496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,fp8,0,3.20034122467041
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16384,4,2,128,0,1,fp8,fp8,0,1.9838293393452961
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,float16,0,1.7530879974365234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,fp8,0,1.7909760475158691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,4,128,0,1,fp8,fp8,0,1.077077309290568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,fp8,0,1.7245866457621257
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,float16,0,1.7850027084350586
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,1,128,0,1,fp8,fp8,0,1.0736640294392903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,float16,0,1.7486507097880046
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,fp8,0,1.7735679944356282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16384,4,2,128,0,1,fp8,fp8,0,1.0736640294392903
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,1,128,0,1,fp8,fp8,0,4.604074796040853
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,float16,0,7.663103739420573
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,fp8,0,7.959381103515625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,2,128,0,1,fp8,fp8,0,4.735317230224609
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,fp8,0,8.104618708292643
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,float16,0,7.716351826985677
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,float16,0,4.011861483256022
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,fp8,0,3.940863927205404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,4,128,0,1,fp8,fp8,0,2.4436052640279136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,1,128,0,1,fp8,fp8,0,2.178218682607015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,float16,0,3.5891199111938477
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,fp8,0,3.630250612894694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,float16,0,3.694250742594401
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,fp8,0,3.71507199605306
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,12288,4,2,128,0,1,fp8,fp8,0,2.337792078653971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,float16,0,1.9568640391031902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,4,128,0,1,fp8,fp8,0,1.242965300877889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,fp8,0,1.8993493715922039
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,float16,0,1.8906453450520833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,fp8,0,1.8333013852437336
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,1,128,0,1,fp8,fp8,0,1.1183786392211914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,float16,0,1.8809173901875813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,2,128,0,1,fp8,fp8,0,1.1446613470713298
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,fp8,0,1.8862080574035645
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,float16,0,1.065130631128947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,fp8,0,1.05949862798055
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,4,128,0,1,fp8,fp8,0,0.683690627415975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,float16,0,1.0484053293863933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,fp8,0,1.050112009048462
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,1,128,0,1,fp8,fp8,0,0.6729386647542318
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,float16,0,1.0513066450754802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,fp8,0,1.0685439904530842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,12288,4,2,128,0,1,fp8,fp8,0,0.6667946974436442
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,1,128,0,1,fp8,fp8,0,3.1981226603190103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,float16,0,5.282986640930176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,fp8,0,5.365930557250977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,float16,0,5.430784225463867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,fp8,0,5.574997584025065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,10240,4,2,128,0,1,fp8,fp8,0,3.3402878443400064
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,fp8,0,2.8020054499308267
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,float16,0,2.8750505447387695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,4,128,0,1,fp8,fp8,0,1.8005332946777344
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,float16,0,2.476031939188639
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,fp8,0,2.518869400024414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,1,128,0,1,fp8,fp8,0,1.5192747116088867
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,float16,0,2.696021397908529
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,fp8,0,2.5460054079691568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,10240,4,2,128,0,1,fp8,fp8,0,1.6187733014424641
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,float16,0,1.3914453188578289
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,fp8,0,1.3428053855895996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,4,128,0,1,fp8,fp8,0,0.8919040362040201
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,float16,0,1.3636266390482585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,fp8,0,1.3216426372528076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,1,128,0,1,fp8,fp8,0,0.8280746936798096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,float16,0,1.337685267130534
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,fp8,0,1.334272066752116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,10240,4,2,128,0,1,fp8,fp8,0,0.8306346734364828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,float16,0,0.7937706311543783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,fp8,0,0.7918933232625326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,4,128,0,1,fp8,fp8,0,0.5133653481801351
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,float16,0,0.7869439919789633
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,fp8,0,0.7905279795328776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,1,128,0,1,fp8,fp8,0,0.5048319896062216
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,float16,0,0.7814826965332031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,2,128,0,1,fp8,fp8,0,0.5056853294372559
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,fp8,0,0.7905279795328776
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,float16,0,7.230122884114583
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,fp8,0,6.935893376668294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,1,128,0,1,fp8,fp8,0,4.376746813456218
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,float16,0,7.554901123046875
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,2,128,0,1,fp8,fp8,0,4.520618756612142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,fp8,0,7.399253209431966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,float16,0,3.7782185872395835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,fp8,0,3.7988694508870444
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,4,128,0,1,fp8,fp8,0,2.44923734664917
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,fp8,0,3.2935253779093423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,float16,0,3.431424140930176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,1,128,0,1,fp8,fp8,0,2.212010701497396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,float16,0,3.4785280227661133
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,2,128,0,1,fp8,fp8,0,2.228223959604899
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,fp8,0,3.4155521392822266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,float16,0,1.90446933110555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,fp8,0,1.8334719340006511
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,4,128,0,1,fp8,fp8,0,1.2390399773915608
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,float16,0,1.6592213312784831
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,1,128,0,1,fp8,fp8,0,0.999936024347941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,fp8,0,1.673898696899414
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,float16,0,1.661952018737793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,fp8,0,1.6568320592244465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,float16,0,0.9094826380411783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,8192,4,2,128,0,1,fp8,fp8,0,1.0709333419799805
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,fp8,0,0.9079466660817465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,4,128,0,1,fp8,fp8,0,0.5771946509679159
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,float16,0,0.889685312906901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,fp8,0,0.9188693364461263
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,1,128,0,1,fp8,fp8,0,0.5543253421783447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,float16,0,0.9041919708251953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,fp8,0,0.8840533097585043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,8192,4,2,128,0,1,fp8,fp8,0,0.5698560078938802
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,float16,0,0.5541546742121378
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,fp8,0,0.5558613141377767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,4,128,0,1,fp8,fp8,0,0.3601066668828328
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,float16,0,0.5558613141377767
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,fp8,0,0.5502293507258097
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,1,128,0,1,fp8,fp8,0,0.36266668637593585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,float16,0,0.5488640069961548
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,2,128,0,1,fp8,fp8,0,0.36403199036916095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,fp8,0,0.5544960101445516
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,float16,0,4.054527918497722
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,fp8,0,3.9412053426106772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,1,128,0,1,fp8,fp8,0,2.711381276448568
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,float16,0,4.1325225830078125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,fp8,0,4.118528048197429
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,6144,4,2,128,0,1,fp8,fp8,0,2.709845225016276
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,float16,0,2.2599679629007974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,fp8,0,2.25655460357666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,4,128,0,1,fp8,fp8,0,1.5477760632832844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,float16,0,1.911296049753825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,1,128,0,1,fp8,fp8,0,1.2156586647033691
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,fp8,0,1.9310933748881023
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,float16,0,2.018474737803141
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,fp8,0,1.979050636291504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,6144,4,2,128,0,1,fp8,fp8,0,1.3392213185628254
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,float16,0,1.0987520217895508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,fp8,0,1.047381321589152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,4,128,0,1,fp8,fp8,0,0.7316479682922363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,float16,0,0.964949369430542
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,fp8,0,0.9929386774698893
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,1,128,0,1,fp8,fp8,0,0.6029653151830038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,float16,0,0.9712639649709066
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,fp8,0,0.981333335240682
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,float16,0,0.566271980603536
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,6144,4,2,128,0,1,fp8,fp8,0,0.6236159801483154
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,fp8,0,0.5705386797587076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,4,128,0,1,fp8,fp8,0,0.3534506559371948
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,float16,0,0.5517653226852417
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,fp8,0,0.5469866593678793
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,1,128,0,1,fp8,fp8,0,0.34594134489695233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,float16,0,0.5505706469217936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,fp8,0,0.5434026718139648
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,6144,4,2,128,0,1,fp8,fp8,0,0.35310932000478107
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,float16,0,0.34645334879557294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,fp8,0,0.34594134489695233
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,4,128,0,1,fp8,fp8,0,0.23500800132751465
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,float16,0,0.3490133285522461
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,fp8,0,0.3421866496404012
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,1,128,0,1,fp8,fp8,0,0.23347200949986777
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,float16,0,0.34867199261983234
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,2,128,0,1,fp8,fp8,0,0.2336426575978597
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,fp8,0,0.34645334879557294
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,float16,0,3.9710718790690103
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,1,128,0,1,fp8,fp8,0,2.7296425501505532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,fp8,0,4.056917190551758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,float16,0,4.141909281412761
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,fp8,0,4.102826754252116
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,float16,0,2.263040065765381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,4096,4,2,128,0,1,fp8,fp8,0,2.899456024169922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,fp8,0,2.2399999300638833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,4,128,0,1,fp8,fp8,0,1.5752533276875813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,float16,0,1.8662400245666504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,fp8,0,1.885525385538737
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,1,128,0,1,fp8,fp8,0,1.2511573632558186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,float16,0,1.9880960782368977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,fp8,0,1.9611306190490723
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,4096,4,2,128,0,1,fp8,fp8,0,1.4115840593973796
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,float16,0,1.1182080109914143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,fp8,0,1.065130631128947
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,4,128,0,1,fp8,fp8,0,0.7837013403574625
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,float16,0,0.8997546831766764
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,fp8,0,0.8644266923268636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,1,128,0,1,fp8,fp8,0,0.5534720023473104
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,float16,0,0.912384033203125
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,fp8,0,0.9110186894734701
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,float16,0,0.5019306739171346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,4096,4,2,128,0,1,fp8,fp8,0,0.6249813238779703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,fp8,0,0.508074680964152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,4,128,0,1,fp8,fp8,0,0.32204800844192505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,float16,0,0.47086934248606366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,fp8,0,0.4747946659723918
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,1,128,0,1,fp8,fp8,0,0.303274671236674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,float16,0,0.5002239942550659
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,fp8,0,0.4833279848098755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,4096,4,2,128,0,1,fp8,fp8,0,0.30719999472300213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,float16,0,0.3036160071690877
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,4,128,0,1,fp8,fp8,0,0.19473065932591757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,fp8,0,0.3056640028953552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,float16,0,0.2990079919497172
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,fp8,0,0.2969599962234497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,float16,0,0.2916693290074666
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,1,128,0,1,fp8,fp8,0,0.20172800620396933
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,2,128,0,1,fp8,fp8,0,0.19950934251149496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,fp8,0,0.2969599962234497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,float16,0,0.18449066082636514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,fp8,0,0.18858667214711508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,4,128,0,1,fp8,fp8,0,0.1225386659304301
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,float16,0,0.18756266434987387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,fp8,0,0.18619734048843384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,1,128,0,1,fp8,fp8,0,0.12288000186284383
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,float16,0,0.1884160041809082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,fp8,0,0.18653867642084757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,4096,4,2,128,0,1,fp8,fp8,0,0.12014933427174886
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,float16,0,2.3987199465433755
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,fp8,0,2.312533378601074
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,1,128,0,1,fp8,fp8,0,1.6720213890075684
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,float16,0,2.500607967376709
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,fp8,0,2.5011199315389
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,3072,4,2,128,0,1,fp8,fp8,0,1.803434689839681
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,fp8,0,1.3619200388590496
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,float16,0,1.4248959223429363
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,4,128,0,1,fp8,fp8,0,1.0224640369415283
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,fp8,0,1.0642773310343425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,float16,0,1.0862932999928792
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,1,128,0,1,fp8,fp8,0,0.73471999168396
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,float16,0,1.2264106273651123
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,fp8,0,1.180842638015747
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,3072,4,2,128,0,1,fp8,fp8,0,0.8388266563415527
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,float16,0,0.65228799978892
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,fp8,0,0.6055253346761068
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,4,128,0,1,fp8,fp8,0,0.47086934248606366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,float16,0,0.5389653444290161
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,fp8,0,0.529749313990275
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,1,128,0,1,fp8,fp8,0,0.3333119948705037
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,float16,0,0.5442560116449991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,fp8,0,0.5338453451792399
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,3072,4,2,128,0,1,fp8,fp8,0,0.3452586730321248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,float16,0,0.3118079900741577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,fp8,0,0.3051519989967346
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,4,128,0,1,fp8,fp8,0,0.19524266322453818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,float16,0,0.29149866104125977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,fp8,0,0.29149866104125977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,1,128,0,1,fp8,fp8,0,0.19814399878184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,float16,0,0.29320534070332843
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,fp8,0,0.2950826684633891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,3072,4,2,128,0,1,fp8,fp8,0,0.19814399878184
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,float16,0,0.18295466899871826
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,fp8,0,0.18466132879257202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,4,128,0,1,fp8,fp8,0,0.1262933313846588
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,float16,0,0.18414932489395142
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,fp8,0,0.18261333306630453
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,1,128,0,1,fp8,fp8,0,0.11963733037312825
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,float16,0,0.18500266472498575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,fp8,0,0.18705066045125326
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,3072,4,2,128,0,1,fp8,fp8,0,0.12475732962290446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,float16,0,0.1365333298842112
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,fp8,0,0.13687466581662497
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,4,128,0,1,fp8,fp8,0,0.09181867043177287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,float16,0,0.13806933164596558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,fp8,0,0.13636266191800436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,1,128,0,1,fp8,fp8,0,0.09130666653315227
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,float16,0,0.13772799571355185
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,fp8,0,0.13704533378283182
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,3072,4,2,128,0,1,fp8,fp8,0,0.09181867043177287
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,float16,0,2.482858657836914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,1,128,0,1,fp8,fp8,0,1.757695992787679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,fp8,0,2.496682643890381
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,float16,0,2.6168319384256997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,float16,0,1.5129599571228027
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,fp8,0,1.4718292554219563
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,2,128,0,1,fp8,fp8,0,1.900544007619222
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,fp8,0,2.617173353830973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,4,128,0,1,fp8,fp8,0,1.085098663965861
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,float16,0,1.1455146471659343
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,fp8,0,1.137664000193278
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,1,128,0,1,fp8,fp8,0,0.8190293312072754
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,float16,0,1.2571307023366292
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,fp8,0,1.2313600381215413
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,2048,4,2,128,0,1,fp8,fp8,0,0.92740265528361
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,float16,0,0.7255040009816488
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,fp8,0,0.6930773258209229
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,4,128,0,1,fp8,fp8,0,0.5481813351313273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,float16,0,0.508074680964152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,fp8,0,0.48878931999206543
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,1,128,0,1,fp8,fp8,0,0.31624533732732135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,float16,0,0.5452800194422404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,fp8,0,0.5306026538213094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,float16,0,0.28125866254170734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,2048,4,2,128,0,1,fp8,fp8,0,0.3968000014623006
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,fp8,0,0.2851840058962504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,float16,0,0.2585600018501282
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,4,128,0,1,fp8,fp8,0,0.18500266472498575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,fp8,0,0.2635093331336975
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,1,128,0,1,fp8,fp8,0,0.17117865880330405
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,fp8,0,0.2701653242111206
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,float16,0,0.2769920031229655
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,2048,4,2,128,0,1,fp8,fp8,0,0.16674133141835532
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,float16,0,0.1713493267695109
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,fp8,0,0.16383999586105347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,4,128,0,1,fp8,fp8,0,0.10990933577219646
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,float16,0,0.16639999548594156
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,fp8,0,0.1646933356920878
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,1,128,0,1,fp8,fp8,0,0.1083733340104421
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,float16,0,0.16537599762280783
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,fp8,0,0.1629866659641266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,float16,0,0.09471999605496724
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,2048,4,2,128,0,1,fp8,fp8,0,0.10939733187357585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,fp8,0,0.10001066327095032
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,4,128,0,1,fp8,fp8,0,0.06109866499900818
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,float16,0,0.09540266791979472
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,fp8,0,0.09727999567985535
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,1,128,0,1,fp8,fp8,0,0.06007466713587443
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,float16,0,0.09454933802286784
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,fp8,0,0.09608532985051473
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,2048,4,2,128,0,1,fp8,fp8,0,0.060415998101234436
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,float16,0,0.07611733178297679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,fp8,0,0.07611733178297679
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,4,128,0,1,fp8,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,float16,0,0.07645866771539052
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,fp8,0,0.0766293356815974
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,1,128,0,1,fp8,fp8,0,0.04966400067011515
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,float16,0,0.07679999868075053
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,fp8,0,0.0769706666469574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,2048,4,2,128,0,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,float16,0,1.5295146306355794
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,1,128,0,1,fp8,fp8,0,1.0876586437225342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,fp8,0,1.5045973459879558
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,float16,0,1.6851627031962078
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,fp8,0,1.6343040466308594
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,float16,0,0.9751893679300944
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1536,4,2,128,0,1,fp8,fp8,0,1.1499520142873128
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,fp8,0,0.9390079975128174
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,4,128,0,1,fp8,fp8,0,0.7270399729410807
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,float16,0,0.6655999819437662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,fp8,0,0.6539946794509888
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,1,128,0,1,fp8,fp8,0,0.49186134338378906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,2,128,0,1,fp8,fp8,0,0.5684906641642252
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,float16,0,0.79257599512736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,fp8,0,0.7688533465067545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,float16,0,0.4283733367919922
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,fp8,0,0.3717120091120402
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,4,128,0,1,fp8,fp8,0,0.3529386520385742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,float16,0,0.2996906638145447
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,fp8,0,0.30344533920288086
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,1,128,0,1,fp8,fp8,0,0.18175999323527017
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,fp8,0,0.30771199862162274
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,2,128,0,1,fp8,fp8,0,0.20753065745035806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,float16,0,0.314026673634847
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,float16,0,0.17988266547520956
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,fp8,0,0.1771519978841146
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,4,128,0,1,fp8,fp8,0,0.1129813293615977
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,float16,0,0.1867093245188395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,fp8,0,0.16844799121220908
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,1,128,0,1,fp8,fp8,0,0.11264000336329143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,float16,0,0.17459199825922647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,fp8,0,0.16861865917841592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1536,4,2,128,0,1,fp8,fp8,0,0.11264000336329143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,float16,0,0.11110400160153706
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,fp8,0,0.11195733149846394
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,4,128,0,1,fp8,fp8,0,0.07884799937407176
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,float16,0,0.10734933614730835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,fp8,0,0.11366400122642517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,1,128,0,1,fp8,fp8,0,0.07628799974918365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,2,128,0,1,fp8,fp8,0,0.07526400188604991
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,fp8,0,0.11212799946467082
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,float16,0,0.10683733224868774
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,float16,0,0.07150933146476746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,fp8,0,0.0721919983625412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,4,128,0,1,fp8,fp8,0,0.05034666756788889
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,float16,0,0.07202133536338806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,fp8,0,0.0721919983625412
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,1,128,0,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,float16,0,0.07150933146476746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,fp8,0,0.07150933146476746
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1536,4,2,128,0,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,float16,0,0.05870933334032694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,fp8,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,4,128,0,1,fp8,fp8,0,0.03908266623814901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,fp8,0,0.05922133227189382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,float16,0,0.05905066430568695
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,1,128,0,1,fp8,fp8,0,0.038912000755469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,float16,0,0.05922133227189382
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,fp8,0,0.059562668204307556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1536,4,2,128,0,1,fp8,fp8,0,0.038912000755469
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,float16,0,1.5539199511210124
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,fp8,0,1.5414613087972004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,1,128,0,1,fp8,fp8,0,1.0525013605753581
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,fp8,0,1.7134933471679688
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,float16,0,1.7908053398132324
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,float16,0,1.1048959891001384
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,1024,4,2,128,0,1,fp8,fp8,0,1.24945068359375
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,fp8,0,1.0535253683725994
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,4,128,0,1,fp8,fp8,0,0.7884799639383951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,fp8,0,0.7608319918314616
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,float16,0,0.7835306326548258
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,1,128,0,1,fp8,fp8,0,0.5225813388824463
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,float16,0,0.8917333285013834
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,fp8,0,0.846677303314209
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,1024,4,2,128,0,1,fp8,fp8,0,0.6133759816487631
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,float16,0,0.5490346749623617
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,fp8,0,0.5067093372344971
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,4,128,0,1,fp8,fp8,0,0.38809601465861004
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,float16,0,0.2921813329060872
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,fp8,0,0.29098665714263916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,1,128,0,1,fp8,fp8,0,0.20241065820058188
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,float16,0,0.34303998947143555
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,fp8,0,0.32443734010060626
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,1024,4,2,128,0,1,fp8,fp8,0,0.28962133328119916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,float16,0,0.17390932639439902
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,fp8,0,0.16588800152142844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,4,128,0,1,fp8,fp8,0,0.10615467031796773
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,float16,0,0.1585493286450704
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,fp8,0,0.15940266847610474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,1,128,0,1,fp8,fp8,0,0.10222933689753215
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,float16,0,0.15940266847610474
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,fp8,0,0.16247466206550598
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,1024,4,2,128,0,1,fp8,fp8,0,0.0993280013402303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,float16,0,0.10240000486373901
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,fp8,0,0.10393599669138591
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,4,128,0,1,fp8,fp8,0,0.07202133536338806
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,float16,0,0.10052266716957092
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,fp8,0,0.0993280013402303
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,1,128,0,1,fp8,fp8,0,0.06092800199985504
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,float16,0,0.10257066289583842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,2,128,0,1,fp8,fp8,0,0.0602453351020813
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,fp8,0,0.09830400347709656
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,float16,0,0.0554666668176651
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,fp8,0,0.05495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,4,128,0,1,fp8,fp8,0,0.03857066730658213
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,float16,0,0.053930665055910744
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,fp8,0,0.05478399991989136
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,1,128,0,1,fp8,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,float16,0,0.05444266895453135
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,fp8,0,0.05529599885145823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,float16,0,0.04351999859015147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,1024,4,2,128,0,1,fp8,fp8,0,0.038058665891488395
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,fp8,0,0.04334933559099833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,4,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,float16,0,0.04266666869322459
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,fp8,0,0.04249600072701772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,1,128,0,1,fp8,fp8,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,float16,0,0.043178667624791466
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,fp8,0,0.043007999658584595
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,1024,4,2,128,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,float16,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,4,128,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,float16,0,0.040618665516376495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,fp8,0,0.040448000033696495
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,1,128,0,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,float16,0,0.044031997521718345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,1024,4,2,128,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,1,128,0,1,float16,float16,0,1.1948373317718506
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,1,128,0,1,float16,fp8,0,1.160703976949056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,1,128,0,1,fp8,fp8,0,0.8323413530985514
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,2,128,0,1,float16,float16,0,1.4387200673421223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,2,128,0,1,float16,fp8,0,1.3760852813720703
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,4,128,0,1,float16,float16,0,1.0422613620758057
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,512,4,2,128,0,1,fp8,fp8,0,1.047381321589152
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,4,128,0,1,float16,fp8,0,0.9861120382944742
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,4,128,0,1,fp8,fp8,0,0.7406933307647705
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,1,128,0,1,float16,float16,0,0.5898240009943644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,1,128,0,1,float16,fp8,0,0.5705386797587076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,1,128,0,1,fp8,fp8,0,0.4092586835225423
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,2,128,0,1,float16,fp8,0,0.6934186617533366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,2,128,0,1,fp8,fp8,0,0.5108053286870321
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,512,4,2,128,0,1,float16,float16,0,0.7205546696980795
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,4,128,0,1,float16,float16,0,0.5024426778157552
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,4,128,0,1,float16,fp8,0,0.4575573205947876
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,1,128,0,1,float16,float16,0,0.1996799906094869
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,4,128,0,1,fp8,fp8,0,0.3357013463973999
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,1,128,0,1,float16,fp8,0,0.19387733936309814
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,1,128,0,1,fp8,fp8,0,0.13431466619173685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,2,128,0,1,float16,float16,0,0.25514666239420575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,2,128,0,1,float16,fp8,0,0.21947733561197916
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,512,4,2,128,0,1,fp8,fp8,0,0.22835199038187662
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,4,128,0,1,float16,float16,0,0.11827199657758077
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,4,128,0,1,float16,fp8,0,0.10717866818110149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,1,128,0,1,float16,float16,0,0.10035199920336406
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,4,128,0,1,fp8,fp8,0,0.07014399766921997
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,1,128,0,1,fp8,fp8,0,0.06758399804433186
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,1,128,0,1,float16,fp8,0,0.09966933727264404
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,2,128,0,1,float16,float16,0,0.10154666503270467
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,2,128,0,1,float16,fp8,0,0.10103467106819153
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,512,4,2,128,0,1,fp8,fp8,0,0.06690133114655812
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,4,128,0,1,float16,fp8,0,0.06178133189678192
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,4,128,0,1,float16,float16,0,0.0628053347269694
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,4,128,0,1,fp8,fp8,0,0.04249600072701772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,1,128,0,1,float16,float16,0,0.056832000613212585
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,1,128,0,1,float16,fp8,0,0.05734399954477946
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,1,128,0,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,2,128,0,1,float16,float16,0,0.059562668204307556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,2,128,0,1,float16,fp8,0,0.059562668204307556
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,512,4,2,128,0,1,fp8,fp8,0,0.04095999896526337
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,4,128,0,1,float16,float16,0,0.03583999971548716
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,4,128,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,4,128,0,1,fp8,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,1,128,0,1,float16,float16,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,1,128,0,1,float16,fp8,0,0.034645333886146545
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,1,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,2,128,0,1,float16,float16,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,2,128,0,1,float16,fp8,0,0.034474665919939675
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,512,4,2,128,0,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,4,128,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,4,128,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,1,128,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,4,128,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,1,128,0,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,1,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,2,128,0,1,float16,float16,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,2,128,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,512,4,2,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,4,128,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,4,128,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,4,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,1,128,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,1,128,0,1,float16,fp8,0,0.024746666351954143
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,1,128,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,2,128,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,2,128,0,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,512,4,2,128,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,4,128,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,4,128,0,1,float16,float16,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,4,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,1,128,0,1,float16,fp8,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,1,128,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,2,128,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,2,128,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,512,4,2,128,0,1,float16,fp8,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,1,128,0,1,float16,float16,0,0.5672959884007772
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,1,128,0,1,float16,fp8,0,0.5454506476720175
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,1,128,0,1,fp8,fp8,0,0.35788798332214355
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,2,128,0,1,float16,float16,0,0.7236266930898031
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,2,128,0,1,float16,fp8,0,0.6925653616587321
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,256,4,2,128,0,1,fp8,fp8,0,0.4884479840596517
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,4,128,0,1,float16,float16,0,0.4978346824645996
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,4,128,0,1,float16,fp8,0,0.4585813283920288
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,4,128,0,1,fp8,fp8,0,0.3054933349291484
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,1,128,0,1,float16,float16,0,0.13619200388590494
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,1,128,0,1,float16,fp8,0,0.13226667046546936
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,1,128,0,1,fp8,fp8,0,0.09113599856694539
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,2,128,0,1,float16,fp8,0,0.16861865917841592
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,2,128,0,1,float16,float16,0,0.20411733786265054
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,256,4,2,128,0,1,fp8,fp8,0,0.19780266284942627
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,4,128,0,1,float16,float16,0,0.09062400460243225
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,4,128,0,1,float16,fp8,0,0.08277333279450734
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,4,128,0,1,fp8,fp8,0,0.055125330885251365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,1,128,0,1,float16,float16,0,0.0679253339767456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,1,128,0,1,float16,fp8,0,0.0682666649421056
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,1,128,0,1,fp8,fp8,0,0.05085866649945577
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,2,128,0,1,float16,fp8,0,0.07133866846561432
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,2,128,0,1,float16,float16,0,0.08106666803359985
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,256,4,2,128,0,1,fp8,fp8,0,0.05120000243186951
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,4,128,0,1,float16,float16,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,4,128,0,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,4,128,0,1,fp8,fp8,0,0.032255999743938446
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,1,128,0,1,float16,float16,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,1,128,0,1,float16,fp8,0,0.040789333482583366
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,1,128,0,1,fp8,fp8,0,0.0315733328461647
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,2,128,0,1,float16,float16,0,0.041984001795450844
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,2,128,0,1,float16,fp8,0,0.04130133241415024
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,256,4,2,128,0,1,fp8,fp8,0,0.03089066594839096
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,4,128,0,1,float16,float16,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,4,128,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,4,128,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,1,128,0,1,float16,float16,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,1,128,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,1,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,2,128,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,2,128,0,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,256,4,2,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,4,128,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,4,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,4,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,1,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,1,128,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,2,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,1,128,0,1,fp8,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,2,128,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,256,4,2,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,4,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,4,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,4,128,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,1,128,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,1,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,1,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,2,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,2,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,256,4,2,128,0,1,fp8,fp8,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,4,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,4,128,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,4,128,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,1,128,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,1,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,1,128,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,2,128,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,2,128,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,256,4,2,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,4,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,4,128,0,1,float16,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,4,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,1,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,1,128,0,1,fp8,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,1,128,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,2,128,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,2,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,256,4,2,128,0,1,fp8,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,1,128,0,1,float16,float16,0,0.11793067057927449
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,1,128,0,1,float16,fp8,0,0.10734933614730835
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,1,128,0,1,fp8,fp8,0,0.08021333316961925
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,2,128,0,1,float16,float16,0,0.1950719952583313
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,2,128,0,1,float16,fp8,0,0.1539413332939148
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,4,128,0,1,float16,fp8,0,0.06058666606744131
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,4,128,0,1,float16,float16,0,0.07594666878382365
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,128,4,2,128,0,1,fp8,fp8,0,0.18943999210993448
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,4,128,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,1,128,0,1,float16,float16,0,0.05341866612434387
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,1,128,0,1,float16,fp8,0,0.053077335158983864
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,1,128,0,1,fp8,fp8,0,0.04351999859015147
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,2,128,0,1,float16,float16,0,0.05563733478387197
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,2,128,0,1,float16,fp8,0,0.05495466788609823
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,128,4,2,128,0,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,1,128,0,1,float16,float16,0,0.031744000812371574
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,4,128,0,1,float16,float16,0,0.03498666733503342
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,4,128,0,1,fp8,fp8,0,0.026965332527955372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,4,128,0,1,float16,fp8,0,0.0341333324710528
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,1,128,0,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,1,128,0,1,fp8,fp8,0,0.025770666698614757
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,2,128,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,2,128,0,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,128,4,2,128,0,1,fp8,fp8,0,0.025600001215934753
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,4,128,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,4,128,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,1,128,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,4,128,0,1,float16,float16,0,0.021503999829292297
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,1,128,0,1,float16,fp8,0,0.020309332758188248
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,1,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,2,128,0,1,float16,float16,0,0.020479999482631683
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,2,128,0,1,float16,fp8,0,0.02065066620707512
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,128,4,2,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,4,128,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,4,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,1,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,4,128,0,1,fp8,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,1,128,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,1,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,2,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,2,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,128,4,2,128,0,1,fp8,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,4,128,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,4,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,4,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,1,128,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,1,128,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,1,128,0,1,fp8,fp8,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,2,128,0,1,float16,float16,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,2,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,128,4,2,128,0,1,fp8,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,4,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,4,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,4,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,1,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,1,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,1,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,2,128,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,2,128,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,128,4,2,128,0,1,fp8,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,4,128,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,4,128,0,1,float16,float16,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,4,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,1,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,1,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,1,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,2,128,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,2,128,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,128,4,2,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,4,128,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,4,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,4,128,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,1,128,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,1,128,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,1,128,0,1,fp8,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,2,128,0,1,float16,float16,0,0.011946666985750198
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,2,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,128,4,2,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,1,128,0,1,float16,float16,0,0.04625066618124644
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,1,128,0,1,float16,fp8,0,0.04693333307902018
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,1,128,0,1,fp8,fp8,0,0.039594667653242745
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,2,128,0,1,float16,float16,0,0.04983466863632202
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,2,128,0,1,float16,fp8,0,0.048298666874567665
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,64,4,2,128,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,4,128,0,1,float16,float16,0,0.031061333914597828
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,4,128,0,1,float16,fp8,0,0.029866665601730347
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,4,128,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,1,128,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,1,128,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,2,128,0,1,float16,float16,0,0.02867199977238973
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,1,128,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,2,128,0,1,float16,fp8,0,0.028501334289709728
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,64,4,2,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,4,128,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,4,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,4,128,0,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,1,128,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,1,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,1,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,2,128,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,2,128,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,64,4,2,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,4,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,4,128,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,4,128,0,1,fp8,fp8,0,0.012117333710193634
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,1,128,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,1,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,1,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,2,128,0,1,float16,float16,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,2,128,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,64,4,2,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,4,128,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,4,128,0,1,float16,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,4,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,1,128,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,1,128,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,1,128,0,1,fp8,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,2,128,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,2,128,0,1,float16,fp8,0,0.01109333336353302
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,64,4,2,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,4,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,4,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,1,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,1,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,1,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,2,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,2,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,64,4,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,4,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,4,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,4,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,1,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,1,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,2,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,2,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,64,4,2,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,4,128,0,1,float16,fp8,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,4,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,4,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,1,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,1,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,2,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,2,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,64,4,2,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,4,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,4,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,1,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,4,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,1,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,2,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,2,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,64,4,2,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,1,128,0,1,float16,float16,0,0.029525332152843475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,1,128,0,1,float16,fp8,0,0.029525332152843475
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,1,128,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,2,128,0,1,float16,float16,0,0.030378667016824085
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,2,128,0,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,32,4,2,128,0,1,fp8,fp8,0,0.02611200014750163
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,4,128,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,4,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,4,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,1,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,1,128,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,1,128,0,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,2,128,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,2,128,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,32,4,2,128,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,4,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,4,128,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,4,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,1,128,0,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,1,128,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,1,128,0,1,fp8,fp8,0,0.011605333536863327
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,2,128,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,4,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,2,128,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,32,4,2,128,0,1,fp8,fp8,0,0.011264000087976456
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,4,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,4,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,1,128,0,1,float16,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,2,128,0,1,float16,float16,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,2,128,0,1,float16,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,32,4,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,4,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,4,128,0,1,float16,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,1,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,4,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,1,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,2,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,2,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,32,4,2,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,4,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,4,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,4,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,1,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,2,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,32,4,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,4,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,4,128,0,1,fp8,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,4,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,1,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,1,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,32,4,2,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,4,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,4,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,4,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,2,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,32,4,2,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,4,128,0,1,float16,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,1,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,4,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,1,128,0,1,float16,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,2,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,32,4,2,128,0,1,fp8,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,1,128,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,1,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,1,128,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,2,128,0,1,float16,float16,0,0.024234667420387268
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,2,128,0,1,float16,fp8,0,0.02372266600529353
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,4,128,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,256,16,4,2,128,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,4,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,4,128,0,1,fp8,fp8,0,0.01331199953953425
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,1,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,1,128,0,1,float16,fp8,0,0.014677333335081736
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,1,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,2,128,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,2,128,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,128,16,4,2,128,0,1,fp8,fp8,0,0.013141332815090815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,4,128,0,1,float16,float16,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,4,128,0,1,float16,fp8,0,0.010581333190202713
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,4,128,0,1,fp8,fp8,0,0.00972800018886725
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,1,128,0,1,float16,float16,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,1,128,0,1,float16,fp8,0,0.010410666465759277
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,1,128,0,1,fp8,fp8,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,2,128,0,1,float16,float16,0,0.010069333637754122
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,2,128,0,1,float16,fp8,0,0.010751999914646149
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,64,16,4,2,128,0,1,fp8,fp8,0,0.010239999741315842
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,4,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,4,128,0,1,float16,fp8,0,0.034304000437259674
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,1,128,0,1,float16,float16,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,4,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,1,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,2,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,2,128,0,1,float16,fp8,0,0.00938666673998038
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,32,16,4,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,4,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,4,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,1,128,0,1,float16,float16,0,0.009557333464423815
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,1,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,2,128,0,1,float16,float16,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,16,16,4,2,128,0,1,fp8,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,4,128,0,1,fp8,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,1,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,4,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,1,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,1,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,2,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,8,16,4,2,128,0,1,fp8,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,4,128,0,1,float16,float16,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,4,128,0,1,float16,fp8,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,4,128,0,1,fp8,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,1,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,1,128,0,1,float16,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,1,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,2,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,4,16,4,2,128,0,1,fp8,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,4,128,0,1,float16,float16,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,4,128,0,1,fp8,fp8,0,0.008021333565314611
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,4,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,1,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,1,128,0,1,fp8,fp8,0,0.008192000289758047
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,2,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,2,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,2,16,4,2,128,0,1,fp8,fp8,0,0.0085333331177632
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,4,128,0,1,float16,float16,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,4,128,0,1,fp8,fp8,0,0.009045333291093508
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,4,128,0,1,float16,fp8,0,0.009370666618148485
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,1,128,0,1,float16,fp8,0,0.008703999842206636
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,2,128,0,1,float16,float16,0,0.008362666393319765
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,1,128,0,1,float16,float16,0,0.009898666913310686
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,1,128,0,1,fp8,fp8,0,0.009216000015536943
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,2,128,0,1,float16,fp8,0,0.008874666566650072
TRTLLM,1.0.0,NVIDIA L40S,context_attention,torch_flow,1,16,4,2,128,0,1,fp8,fp8,0,0.008362666393319765
